gpuimgproc module for image processing
This commit is contained in:
@@ -1,99 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace blend
|
||||
{
|
||||
template <typename T>
|
||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
|
||||
|
||||
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::cudev::blend;
|
||||
|
||||
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
||||
GpuMat& result, Stream& stream)
|
||||
{
|
||||
CV_Assert(img1.size() == img2.size());
|
||||
CV_Assert(img1.type() == img2.type());
|
||||
CV_Assert(weights1.size() == img1.size());
|
||||
CV_Assert(weights2.size() == img2.size());
|
||||
CV_Assert(weights1.type() == CV_32F);
|
||||
CV_Assert(weights2.type() == CV_32F);
|
||||
|
||||
const Size size = img1.size();
|
||||
const int depth = img1.depth();
|
||||
const int cn = img1.channels();
|
||||
|
||||
result.create(size, CV_MAKE_TYPE(depth, cn));
|
||||
|
||||
switch (depth)
|
||||
{
|
||||
case CV_8U:
|
||||
if (cn != 4)
|
||||
blendLinearCaller<uchar>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||
else
|
||||
blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
case CV_32F:
|
||||
blendLinearCaller<float>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
default:
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "bad image depth in linear blending function");
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@@ -1,199 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
/// Bilateral filtering
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
__device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); }
|
||||
__device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
|
||||
__device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
|
||||
__device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
|
||||
|
||||
__device__ __forceinline__ float sqr(const float& a) { return a * a; }
|
||||
|
||||
template<typename T, typename B>
|
||||
__global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||
|
||||
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
|
||||
if (x >= src.cols || y >= src.rows)
|
||||
return;
|
||||
|
||||
value_type center = saturate_cast<value_type>(src(y, x));
|
||||
|
||||
value_type sum1 = VecTraits<value_type>::all(0);
|
||||
float sum2 = 0;
|
||||
|
||||
int r = ksz / 2;
|
||||
float r2 = (float)(r * r);
|
||||
|
||||
int tx = x - r + ksz;
|
||||
int ty = y - r + ksz;
|
||||
|
||||
if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
|
||||
{
|
||||
for (int cy = y - r; cy < ty; ++cy)
|
||||
for (int cx = x - r; cx < tx; ++cx)
|
||||
{
|
||||
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
|
||||
if (space2 > r2)
|
||||
continue;
|
||||
|
||||
value_type value = saturate_cast<value_type>(src(cy, cx));
|
||||
|
||||
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
|
||||
sum1 = sum1 + weight * value;
|
||||
sum2 = sum2 + weight;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int cy = y - r; cy < ty; ++cy)
|
||||
for (int cx = x - r; cx < tx; ++cx)
|
||||
{
|
||||
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
|
||||
if (space2 > r2)
|
||||
continue;
|
||||
|
||||
value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
|
||||
|
||||
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
|
||||
|
||||
sum1 = sum1 + weight * value;
|
||||
sum2 = sum2 + weight;
|
||||
}
|
||||
}
|
||||
dst(y, x) = saturate_cast<T>(sum1 / sum2);
|
||||
}
|
||||
|
||||
template<typename T, template <typename> class B>
|
||||
void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
|
||||
{
|
||||
dim3 block (32, 8);
|
||||
dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
|
||||
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
|
||||
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
|
||||
|
||||
static caller_t funcs[] =
|
||||
{
|
||||
bilateral_caller<T, BrdReflect101>,
|
||||
bilateral_caller<T, BrdReplicate>,
|
||||
bilateral_caller<T, BrdConstant>,
|
||||
bilateral_caller<T, BrdReflect>,
|
||||
bilateral_caller<T, BrdWrap>,
|
||||
};
|
||||
funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
#define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
|
||||
template void cv::gpu::cudev::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
|
||||
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(short)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(short2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(short3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(short4)
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
|
||||
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int2)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int3)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int4)
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(float)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(float2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(float3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(float4)
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,121 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace blend
|
||||
{
|
||||
template <typename T>
|
||||
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
||||
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
int x_ = x / cn;
|
||||
float w1 = weights1.ptr(y)[x_];
|
||||
float w2 = weights2.ptr(y)[x_];
|
||||
T p1 = img1.ptr(y)[x];
|
||||
T p2 = img2.ptr(y)[x];
|
||||
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
||||
|
||||
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
|
||||
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
|
||||
|
||||
|
||||
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
|
||||
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
float w1 = weights1.ptr(y)[x];
|
||||
float w2 = weights2.ptr(y)[x];
|
||||
float sum_inv = 1.f / (w1 + w2 + 1e-5f);
|
||||
w1 *= sum_inv;
|
||||
w2 *= sum_inv;
|
||||
uchar4 p1 = ((const uchar4*)img1.ptr(y))[x];
|
||||
uchar4 p2 = ((const uchar4*)img2.ptr(y))[x];
|
||||
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
|
||||
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
|
||||
}
|
||||
}
|
||||
|
||||
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
} // namespace blend
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,494 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <utility>
|
||||
#include <algorithm>//std::swap
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace canny
|
||||
{
|
||||
struct L1 : binary_function<int, int, float>
|
||||
{
|
||||
__device__ __forceinline__ float operator ()(int x, int y) const
|
||||
{
|
||||
return ::abs(x) + ::abs(y);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ L1() {}
|
||||
__device__ __forceinline__ L1(const L1&) {}
|
||||
};
|
||||
struct L2 : binary_function<int, int, float>
|
||||
{
|
||||
__device__ __forceinline__ float operator ()(int x, int y) const
|
||||
{
|
||||
return ::sqrtf(x * x + y * y);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ L2() {}
|
||||
__device__ __forceinline__ L2(const L2&) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits<canny::L1> : DefaultTransformFunctorTraits<canny::L1>
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
template <> struct TransformFunctorTraits<canny::L2> : DefaultTransformFunctorTraits<canny::L2>
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace canny
|
||||
{
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
struct SrcTex
|
||||
{
|
||||
const int xoff;
|
||||
const int yoff;
|
||||
__host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
|
||||
|
||||
__device__ __forceinline__ int operator ()(int y, int x) const
|
||||
{
|
||||
return tex2D(tex_src, x + xoff, y + yoff);
|
||||
}
|
||||
};
|
||||
|
||||
template <class Norm> __global__
|
||||
void calcMagnitudeKernel(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y >= mag.rows || x >= mag.cols)
|
||||
return;
|
||||
|
||||
int dxVal = (src(y - 1, x + 1) + 2 * src(y, x + 1) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y, x - 1) + src(y + 1, x - 1));
|
||||
int dyVal = (src(y + 1, x - 1) + 2 * src(y + 1, x) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y - 1, x) + src(y - 1, x + 1));
|
||||
|
||||
dx(y, x) = dxVal;
|
||||
dy(y, x) = dyVal;
|
||||
|
||||
mag(y, x) = norm(dxVal, dyVal);
|
||||
}
|
||||
|
||||
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y));
|
||||
|
||||
bindTexture(&tex_src, srcWhole);
|
||||
SrcTex src(xoff, yoff);
|
||||
|
||||
if (L2Grad)
|
||||
{
|
||||
L2 norm;
|
||||
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
else
|
||||
{
|
||||
L1 norm;
|
||||
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
|
||||
{
|
||||
if (L2Grad)
|
||||
{
|
||||
L2 norm;
|
||||
transform(dx, dy, mag, norm, WithOutMask(), 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
L1 norm;
|
||||
transform(dx, dy, mag, norm, WithOutMask(), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace canny
|
||||
{
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
|
||||
{
|
||||
const int CANNY_SHIFT = 15;
|
||||
const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x == 0 || x >= dx.cols - 1 || y == 0 || y >= dx.rows - 1)
|
||||
return;
|
||||
|
||||
int dxVal = dx(y, x);
|
||||
int dyVal = dy(y, x);
|
||||
|
||||
const int s = (dxVal ^ dyVal) < 0 ? -1 : 1;
|
||||
const float m = tex2D(tex_mag, x, y);
|
||||
|
||||
dxVal = ::abs(dxVal);
|
||||
dyVal = ::abs(dyVal);
|
||||
|
||||
// 0 - the pixel can not belong to an edge
|
||||
// 1 - the pixel might belong to an edge
|
||||
// 2 - the pixel does belong to an edge
|
||||
int edge_type = 0;
|
||||
|
||||
if (m > low_thresh)
|
||||
{
|
||||
const int tg22x = dxVal * TG22;
|
||||
const int tg67x = tg22x + ((dxVal + dxVal) << CANNY_SHIFT);
|
||||
|
||||
dyVal <<= CANNY_SHIFT;
|
||||
|
||||
if (dyVal < tg22x)
|
||||
{
|
||||
if (m > tex2D(tex_mag, x - 1, y) && m >= tex2D(tex_mag, x + 1, y))
|
||||
edge_type = 1 + (int)(m > high_thresh);
|
||||
}
|
||||
else if(dyVal > tg67x)
|
||||
{
|
||||
if (m > tex2D(tex_mag, x, y - 1) && m >= tex2D(tex_mag, x, y + 1))
|
||||
edge_type = 1 + (int)(m > high_thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m > tex2D(tex_mag, x - s, y - 1) && m >= tex2D(tex_mag, x + s, y + 1))
|
||||
edge_type = 1 + (int)(m > high_thresh);
|
||||
}
|
||||
}
|
||||
|
||||
map(y, x) = edge_type;
|
||||
}
|
||||
|
||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y));
|
||||
|
||||
bindTexture(&tex_mag, mag);
|
||||
|
||||
calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace canny
|
||||
{
|
||||
__device__ int counter = 0;
|
||||
|
||||
__global__ void edgesHysteresisLocalKernel(PtrStepSzi map, ushort2* st)
|
||||
{
|
||||
__shared__ volatile int smem[18][18];
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? map(y, x) : 0;
|
||||
if (threadIdx.y == 0)
|
||||
smem[0][threadIdx.x + 1] = y > 0 ? map(y - 1, x) : 0;
|
||||
if (threadIdx.y == blockDim.y - 1)
|
||||
smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? map(y + 1, x) : 0;
|
||||
if (threadIdx.x == 0)
|
||||
smem[threadIdx.y + 1][0] = x > 0 ? map(y, x - 1) : 0;
|
||||
if (threadIdx.x == blockDim.x - 1)
|
||||
smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols ? map(y, x + 1) : 0;
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
smem[0][0] = y > 0 && x > 0 ? map(y - 1, x - 1) : 0;
|
||||
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0)
|
||||
smem[0][blockDim.x + 1] = y > 0 && x + 1 < map.cols ? map(y - 1, x + 1) : 0;
|
||||
if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1)
|
||||
smem[blockDim.y + 1][0] = y + 1 < map.rows && x > 0 ? map(y + 1, x - 1) : 0;
|
||||
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1)
|
||||
smem[blockDim.y + 1][blockDim.x + 1] = y + 1 < map.rows && x + 1 < map.cols ? map(y + 1, x + 1) : 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (x >= map.cols || y >= map.rows)
|
||||
return;
|
||||
|
||||
int n;
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < 16; ++k)
|
||||
{
|
||||
n = 0;
|
||||
|
||||
if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1)
|
||||
{
|
||||
n += smem[threadIdx.y ][threadIdx.x ] == 2;
|
||||
n += smem[threadIdx.y ][threadIdx.x + 1] == 2;
|
||||
n += smem[threadIdx.y ][threadIdx.x + 2] == 2;
|
||||
|
||||
n += smem[threadIdx.y + 1][threadIdx.x ] == 2;
|
||||
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2;
|
||||
|
||||
n += smem[threadIdx.y + 2][threadIdx.x ] == 2;
|
||||
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2;
|
||||
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2;
|
||||
}
|
||||
|
||||
if (n > 0)
|
||||
smem[threadIdx.y + 1][threadIdx.x + 1] = 2;
|
||||
}
|
||||
|
||||
const int e = smem[threadIdx.y + 1][threadIdx.x + 1];
|
||||
|
||||
map(y, x) = e;
|
||||
|
||||
n = 0;
|
||||
|
||||
if (e == 2)
|
||||
{
|
||||
n += smem[threadIdx.y ][threadIdx.x ] == 1;
|
||||
n += smem[threadIdx.y ][threadIdx.x + 1] == 1;
|
||||
n += smem[threadIdx.y ][threadIdx.x + 2] == 1;
|
||||
|
||||
n += smem[threadIdx.y + 1][threadIdx.x ] == 1;
|
||||
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1;
|
||||
|
||||
n += smem[threadIdx.y + 2][threadIdx.x ] == 1;
|
||||
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1;
|
||||
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1;
|
||||
}
|
||||
|
||||
if (n > 0)
|
||||
{
|
||||
const int ind = ::atomicAdd(&counter, 1);
|
||||
st[ind] = make_ushort2(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
||||
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
|
||||
|
||||
edgesHysteresisLocalKernel<<<grid, block>>>(map, st1);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace canny
|
||||
{
|
||||
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
|
||||
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
|
||||
|
||||
__global__ void edgesHysteresisGlobalKernel(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count)
|
||||
{
|
||||
const int stack_size = 512;
|
||||
|
||||
__shared__ int s_counter;
|
||||
__shared__ int s_ind;
|
||||
__shared__ ushort2 s_st[stack_size];
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
s_counter = 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
int ind = blockIdx.y * gridDim.x + blockIdx.x;
|
||||
|
||||
if (ind >= count)
|
||||
return;
|
||||
|
||||
ushort2 pos = st1[ind];
|
||||
|
||||
if (threadIdx.x < 8)
|
||||
{
|
||||
pos.x += c_dx[threadIdx.x];
|
||||
pos.y += c_dy[threadIdx.x];
|
||||
|
||||
if (pos.x > 0 && pos.x < map.cols && pos.y > 0 && pos.y < map.rows && map(pos.y, pos.x) == 1)
|
||||
{
|
||||
map(pos.y, pos.x) = 2;
|
||||
|
||||
ind = Emulation::smem::atomicAdd(&s_counter, 1);
|
||||
|
||||
s_st[ind] = pos;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
while (s_counter > 0 && s_counter <= stack_size - blockDim.x)
|
||||
{
|
||||
const int subTaskIdx = threadIdx.x >> 3;
|
||||
const int portion = ::min(s_counter, blockDim.x >> 3);
|
||||
|
||||
if (subTaskIdx < portion)
|
||||
pos = s_st[s_counter - 1 - subTaskIdx];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
s_counter -= portion;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (subTaskIdx < portion)
|
||||
{
|
||||
pos.x += c_dx[threadIdx.x & 7];
|
||||
pos.y += c_dy[threadIdx.x & 7];
|
||||
|
||||
if (pos.x > 0 && pos.x < map.cols && pos.y > 0 && pos.y < map.rows && map(pos.y, pos.x) == 1)
|
||||
{
|
||||
map(pos.y, pos.x) = 2;
|
||||
|
||||
ind = Emulation::smem::atomicAdd(&s_counter, 1);
|
||||
|
||||
s_st[ind] = pos;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if (s_counter > 0)
|
||||
{
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
ind = ::atomicAdd(&counter, s_counter);
|
||||
s_ind = ind - s_counter;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
ind = s_ind;
|
||||
|
||||
for (int i = threadIdx.x; i < s_counter; i += blockDim.x)
|
||||
st2[ind + i] = s_st[i];
|
||||
}
|
||||
}
|
||||
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
|
||||
|
||||
int count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
while (count > 0)
|
||||
{
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(128);
|
||||
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
|
||||
|
||||
edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
std::swap(st1, st2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace canny
|
||||
{
|
||||
struct GetEdges : unary_function<int, uchar>
|
||||
{
|
||||
__device__ __forceinline__ uchar operator ()(int e) const
|
||||
{
|
||||
return (uchar)(-(e >> 1));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ GetEdges() {}
|
||||
__device__ __forceinline__ GetEdges(const GetEdges&) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits<canny::GetEdges> : DefaultTransformFunctorTraits<canny::GetEdges>
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace canny
|
||||
{
|
||||
void getEdges(PtrStepSzi map, PtrStepSzb dst)
|
||||
{
|
||||
transform(map, dst, GetEdges(), WithOutMask(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,534 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <opencv2/core/cuda/common.hpp>
|
||||
#include <opencv2/core/cuda/vec_traits.hpp>
|
||||
#include <opencv2/core/cuda/vec_math.hpp>
|
||||
#include <opencv2/core/cuda/emulation.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace ccl
|
||||
{
|
||||
enum
|
||||
{
|
||||
WARP_SIZE = 32,
|
||||
WARP_LOG = 5,
|
||||
|
||||
CTA_SIZE_X = 32,
|
||||
CTA_SIZE_Y = 8,
|
||||
|
||||
STA_SIZE_MERGE_Y = 4,
|
||||
STA_SIZE_MERGE_X = 32,
|
||||
|
||||
TPB_X = 1,
|
||||
TPB_Y = 4,
|
||||
|
||||
TILE_COLS = CTA_SIZE_X * TPB_X,
|
||||
TILE_ROWS = CTA_SIZE_Y * TPB_Y
|
||||
};
|
||||
|
||||
template<typename T> struct IntervalsTraits
|
||||
{
|
||||
typedef T elem_type;
|
||||
};
|
||||
|
||||
template<> struct IntervalsTraits<unsigned char>
|
||||
{
|
||||
typedef int dist_type;
|
||||
enum {ch = 1};
|
||||
};
|
||||
|
||||
template<> struct IntervalsTraits<uchar3>
|
||||
{
|
||||
typedef int3 dist_type;
|
||||
enum {ch = 3};
|
||||
};
|
||||
|
||||
template<> struct IntervalsTraits<uchar4>
|
||||
{
|
||||
typedef int4 dist_type;
|
||||
enum {ch = 4};
|
||||
};
|
||||
|
||||
template<> struct IntervalsTraits<unsigned short>
|
||||
{
|
||||
typedef int dist_type;
|
||||
enum {ch = 1};
|
||||
};
|
||||
|
||||
template<> struct IntervalsTraits<ushort3>
|
||||
{
|
||||
typedef int3 dist_type;
|
||||
enum {ch = 3};
|
||||
};
|
||||
|
||||
template<> struct IntervalsTraits<ushort4>
|
||||
{
|
||||
typedef int4 dist_type;
|
||||
enum {ch = 4};
|
||||
};
|
||||
|
||||
template<> struct IntervalsTraits<float>
|
||||
{
|
||||
typedef float dist_type;
|
||||
enum {ch = 1};
|
||||
};
|
||||
|
||||
template<> struct IntervalsTraits<int>
|
||||
{
|
||||
typedef int dist_type;
|
||||
enum {ch = 1};
|
||||
};
|
||||
|
||||
typedef unsigned char component;
|
||||
enum Edges { UP = 1, DOWN = 2, LEFT = 4, RIGHT = 8, EMPTY = 0xF0 };
|
||||
|
||||
template<typename T, int CH> struct InInterval {};
|
||||
|
||||
template<typename T> struct InInterval<T, 1>
|
||||
{
|
||||
typedef typename VecTraits<T>::elem_type E;
|
||||
__host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi) : lo((E)(-_lo.x)), hi((E)_hi.x) {};
|
||||
T lo, hi;
|
||||
|
||||
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
|
||||
{
|
||||
I d = a - b;
|
||||
return lo <= d && d <= hi;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename T> struct InInterval<T, 3>
|
||||
{
|
||||
typedef typename VecTraits<T>::elem_type E;
|
||||
__host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi)
|
||||
: lo (VecTraits<T>::make((E)(-_lo.x), (E)(-_lo.y), (E)(-_lo.z))), hi (VecTraits<T>::make((E)_hi.x, (E)_hi.y, (E)_hi.z)){};
|
||||
T lo, hi;
|
||||
|
||||
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
|
||||
{
|
||||
I d = a - b;
|
||||
return lo.x <= d.x && d.x <= hi.x &&
|
||||
lo.y <= d.y && d.y <= hi.y &&
|
||||
lo.z <= d.z && d.z <= hi.z;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct InInterval<T, 4>
|
||||
{
|
||||
typedef typename VecTraits<T>::elem_type E;
|
||||
__host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi)
|
||||
: lo (VecTraits<T>::make((E)(-_lo.x), (E)(-_lo.y), (E)(-_lo.z), (E)(-_lo.w))), hi (VecTraits<T>::make((E)_hi.x, (E)_hi.y, (E)_hi.z, (E)_hi.w)){};
|
||||
T lo, hi;
|
||||
|
||||
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
|
||||
{
|
||||
I d = a - b;
|
||||
return lo.x <= d.x && d.x <= hi.x &&
|
||||
lo.y <= d.y && d.y <= hi.y &&
|
||||
lo.z <= d.z && d.z <= hi.z &&
|
||||
lo.w <= d.w && d.w <= hi.w;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename T, typename F>
|
||||
__global__ void computeConnectivity(const PtrStepSz<T> image, PtrStepSzb components, F connected)
|
||||
{
|
||||
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
|
||||
if (x >= image.cols || y >= image.rows) return;
|
||||
|
||||
T intensity = image(y, x);
|
||||
component c = 0;
|
||||
|
||||
if ( x > 0 && connected(intensity, image(y, x - 1)))
|
||||
c |= LEFT;
|
||||
|
||||
if ( y > 0 && connected(intensity, image(y - 1, x)))
|
||||
c |= UP;
|
||||
|
||||
if ( x + 1 < image.cols && connected(intensity, image(y, x + 1)))
|
||||
c |= RIGHT;
|
||||
|
||||
if ( y + 1 < image.rows && connected(intensity, image(y + 1, x)))
|
||||
c |= DOWN;
|
||||
|
||||
components(y, x) = c;
|
||||
}
|
||||
|
||||
template< typename T>
|
||||
void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
|
||||
dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y));
|
||||
|
||||
typedef InInterval<typename IntervalsTraits<T>::dist_type, IntervalsTraits<T>::ch> Int_t;
|
||||
|
||||
Int_t inInt(lo, hi);
|
||||
computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const PtrStepSz<T> >(image), edges, inInt);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void computeEdges<uchar> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
template void computeEdges<uchar3> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
template void computeEdges<uchar4> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
template void computeEdges<ushort> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
template void computeEdges<ushort3>(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
template void computeEdges<ushort4>(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
template void computeEdges<int> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
template void computeEdges<float> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
|
||||
__global__ void lableTiles(const PtrStepSzb edges, PtrStepSzi comps)
|
||||
{
|
||||
int x = threadIdx.x + blockIdx.x * TILE_COLS;
|
||||
int y = threadIdx.y + blockIdx.y * TILE_ROWS;
|
||||
|
||||
if (x >= edges.cols || y >= edges.rows) return;
|
||||
|
||||
//currently x is 1
|
||||
int bounds = ((y + TPB_Y) < edges.rows);
|
||||
|
||||
__shared__ int labelsTile[TILE_ROWS][TILE_COLS];
|
||||
__shared__ int edgesTile[TILE_ROWS][TILE_COLS];
|
||||
|
||||
int new_labels[TPB_Y][TPB_X];
|
||||
int old_labels[TPB_Y][TPB_X];
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < TPB_Y; ++i)
|
||||
#pragma unroll
|
||||
for (int j = 0; j < TPB_X; ++j)
|
||||
{
|
||||
int yloc = threadIdx.y + CTA_SIZE_Y * i;
|
||||
int xloc = threadIdx.x + CTA_SIZE_X * j;
|
||||
component c = edges(bounds * (y + CTA_SIZE_Y * i), x + CTA_SIZE_X * j);
|
||||
|
||||
if (!xloc) c &= ~LEFT;
|
||||
if (!yloc) c &= ~UP;
|
||||
|
||||
if (xloc == TILE_COLS -1) c &= ~RIGHT;
|
||||
if (yloc == TILE_ROWS -1) c &= ~DOWN;
|
||||
|
||||
new_labels[i][j] = yloc * TILE_COLS + xloc;
|
||||
edgesTile[yloc][xloc] = c;
|
||||
}
|
||||
|
||||
for (int k = 0; ;++k)
|
||||
{
|
||||
//1. backup
|
||||
#pragma unroll
|
||||
for (int i = 0; i < TPB_Y; ++i)
|
||||
#pragma unroll
|
||||
for (int j = 0; j < TPB_X; ++j)
|
||||
{
|
||||
int yloc = threadIdx.y + CTA_SIZE_Y * i;
|
||||
int xloc = threadIdx.x + CTA_SIZE_X * j;
|
||||
|
||||
old_labels[i][j] = new_labels[i][j];
|
||||
labelsTile[yloc][xloc] = new_labels[i][j];
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
//2. compare local arrays
|
||||
#pragma unroll
|
||||
for (int i = 0; i < TPB_Y; ++i)
|
||||
#pragma unroll
|
||||
for (int j = 0; j < TPB_X; ++j)
|
||||
{
|
||||
int yloc = threadIdx.y + CTA_SIZE_Y * i;
|
||||
int xloc = threadIdx.x + CTA_SIZE_X * j;
|
||||
|
||||
component c = edgesTile[yloc][xloc];
|
||||
int label = new_labels[i][j];
|
||||
|
||||
if (c & UP)
|
||||
label = ::min(label, labelsTile[yloc - 1][xloc]);
|
||||
|
||||
if (c & DOWN)
|
||||
label = ::min(label, labelsTile[yloc + 1][xloc]);
|
||||
|
||||
if (c & LEFT)
|
||||
label = ::min(label, labelsTile[yloc][xloc - 1]);
|
||||
|
||||
if (c & RIGHT)
|
||||
label = ::min(label, labelsTile[yloc][xloc + 1]);
|
||||
|
||||
new_labels[i][j] = label;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
//3. determine: Is any value changed?
|
||||
int changed = 0;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < TPB_Y; ++i)
|
||||
#pragma unroll
|
||||
for (int j = 0; j < TPB_X; ++j)
|
||||
{
|
||||
if (new_labels[i][j] < old_labels[i][j])
|
||||
{
|
||||
changed = 1;
|
||||
Emulation::smem::atomicMin(&labelsTile[0][0] + old_labels[i][j], new_labels[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
changed = Emulation::syncthreadsOr(changed);
|
||||
|
||||
if (!changed)
|
||||
break;
|
||||
|
||||
//4. Compact paths
|
||||
const int *labels = &labelsTile[0][0];
|
||||
#pragma unroll
|
||||
for (int i = 0; i < TPB_Y; ++i)
|
||||
#pragma unroll
|
||||
for (int j = 0; j < TPB_X; ++j)
|
||||
{
|
||||
int label = new_labels[i][j];
|
||||
|
||||
while( labels[label] < label ) label = labels[label];
|
||||
|
||||
new_labels[i][j] = label;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < TPB_Y; ++i)
|
||||
#pragma unroll
|
||||
for (int j = 0; j < TPB_X; ++j)
|
||||
{
|
||||
int label = new_labels[i][j];
|
||||
int yloc = label / TILE_COLS;
|
||||
int xloc = label - yloc * TILE_COLS;
|
||||
|
||||
xloc += blockIdx.x * TILE_COLS;
|
||||
yloc += blockIdx.y * TILE_ROWS;
|
||||
|
||||
label = yloc * edges.cols + xloc;
|
||||
// do it for x too.
|
||||
if (y + CTA_SIZE_Y * i < comps.rows) comps(y + CTA_SIZE_Y * i, x + CTA_SIZE_X * j) = label;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int root(const PtrStepSzi& comps, int label)
|
||||
{
|
||||
while(1)
|
||||
{
|
||||
int y = label / comps.cols;
|
||||
int x = label - y * comps.cols;
|
||||
|
||||
int parent = comps(y, x);
|
||||
|
||||
if (label == parent) break;
|
||||
|
||||
label = parent;
|
||||
}
|
||||
return label;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void isConnected(PtrStepSzi& comps, int l1, int l2, bool& changed)
|
||||
{
|
||||
int r1 = root(comps, l1);
|
||||
int r2 = root(comps, l2);
|
||||
|
||||
if (r1 == r2) return;
|
||||
|
||||
int mi = ::min(r1, r2);
|
||||
int ma = ::max(r1, r2);
|
||||
|
||||
int y = ma / comps.cols;
|
||||
int x = ma - y * comps.cols;
|
||||
|
||||
atomicMin(&comps.ptr(y)[x], mi);
|
||||
changed = true;
|
||||
}
|
||||
|
||||
__global__ void crossMerge(const int tilesNumY, const int tilesNumX, int tileSizeY, int tileSizeX,
|
||||
const PtrStepSzb edges, PtrStepSzi comps, const int yIncomplete, int xIncomplete)
|
||||
{
|
||||
int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
int stride = blockDim.y * blockDim.x;
|
||||
|
||||
int ybegin = blockIdx.y * (tilesNumY * tileSizeY);
|
||||
int yend = ybegin + tilesNumY * tileSizeY;
|
||||
|
||||
if (blockIdx.y == gridDim.y - 1)
|
||||
{
|
||||
yend -= yIncomplete * tileSizeY;
|
||||
yend -= tileSizeY;
|
||||
tileSizeY = (edges.rows % tileSizeY);
|
||||
|
||||
yend += tileSizeY;
|
||||
}
|
||||
|
||||
int xbegin = blockIdx.x * tilesNumX * tileSizeX;
|
||||
int xend = xbegin + tilesNumX * tileSizeX;
|
||||
|
||||
if (blockIdx.x == gridDim.x - 1)
|
||||
{
|
||||
if (xIncomplete) yend = ybegin;
|
||||
xend -= xIncomplete * tileSizeX;
|
||||
xend -= tileSizeX;
|
||||
tileSizeX = (edges.cols % tileSizeX);
|
||||
|
||||
xend += tileSizeX;
|
||||
}
|
||||
|
||||
if (blockIdx.y == (gridDim.y - 1) && yIncomplete)
|
||||
{
|
||||
xend = xbegin;
|
||||
}
|
||||
|
||||
int tasksV = (tilesNumX - 1) * (yend - ybegin);
|
||||
int tasksH = (tilesNumY - 1) * (xend - xbegin);
|
||||
|
||||
int total = tasksH + tasksV;
|
||||
|
||||
bool changed;
|
||||
do
|
||||
{
|
||||
changed = false;
|
||||
for (int taskIdx = tid; taskIdx < total; taskIdx += stride)
|
||||
{
|
||||
if (taskIdx < tasksH)
|
||||
{
|
||||
int indexH = taskIdx;
|
||||
|
||||
int row = indexH / (xend - xbegin);
|
||||
int col = indexH - row * (xend - xbegin);
|
||||
|
||||
int y = ybegin + (row + 1) * tileSizeY;
|
||||
int x = xbegin + col;
|
||||
|
||||
component e = edges( x, y);
|
||||
if (e & UP)
|
||||
{
|
||||
int lc = comps(y,x);
|
||||
int lu = comps(y - 1, x);
|
||||
|
||||
isConnected(comps, lc, lu, changed);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int indexV = taskIdx - tasksH;
|
||||
|
||||
int col = indexV / (yend - ybegin);
|
||||
int row = indexV - col * (yend - ybegin);
|
||||
|
||||
int x = xbegin + (col + 1) * tileSizeX;
|
||||
int y = ybegin + row;
|
||||
|
||||
component e = edges(x, y);
|
||||
if (e & LEFT)
|
||||
{
|
||||
int lc = comps(y, x);
|
||||
int ll = comps(y, x - 1);
|
||||
|
||||
isConnected(comps, lc, ll, changed);
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (Emulation::syncthreadsOr(changed));
|
||||
}
|
||||
|
||||
__global__ void flatten(const PtrStepSzb edges, PtrStepSzi comps)
|
||||
{
|
||||
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
|
||||
if( x < comps.cols && y < comps.rows)
|
||||
comps(y, x) = root(comps, comps(y, x));
|
||||
}
|
||||
|
||||
enum {CC_NO_COMPACT = 0, CC_COMPACT_LABELS = 1};
|
||||
|
||||
void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream)
|
||||
{
|
||||
(void) flags;
|
||||
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
|
||||
dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));
|
||||
|
||||
lableTiles<<<grid, block, 0, stream>>>(edges, comps);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
int tileSizeX = TILE_COLS, tileSizeY = TILE_ROWS;
|
||||
while (grid.x > 1 || grid.y > 1)
|
||||
{
|
||||
dim3 mergeGrid((int)ceilf(grid.x / 2.f), (int)ceilf(grid.y / 2.f));
|
||||
dim3 mergeBlock(STA_SIZE_MERGE_X, STA_SIZE_MERGE_Y);
|
||||
// debug log
|
||||
// std::cout << "merging: " << grid.y << " x " << grid.x << " ---> " << mergeGrid.y << " x " << mergeGrid.x << " for tiles: " << tileSizeY << " x " << tileSizeX << std::endl;
|
||||
crossMerge<<<mergeGrid, mergeBlock, 0, stream>>>(2, 2, tileSizeY, tileSizeX, edges, comps, (int)ceilf(grid.y / 2.f) - grid.y / 2, (int)ceilf(grid.x / 2.f) - grid.x / 2);
|
||||
tileSizeX <<= 1;
|
||||
tileSizeY <<= 1;
|
||||
grid = mergeGrid;
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
grid.x = divUp(edges.cols, block.x);
|
||||
grid.y = divUp(edges.rows, block.y);
|
||||
flatten<<<grid, block, 0, stream>>>(edges, comps);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
} } }
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,186 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/scan.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace clahe
|
||||
{
|
||||
__global__ void calcLutKernel(const PtrStepb src, PtrStepb lut,
|
||||
const int2 tileSize, const int tilesX,
|
||||
const int clipLimit, const float lutScale)
|
||||
{
|
||||
__shared__ int smem[512];
|
||||
|
||||
const int tx = blockIdx.x;
|
||||
const int ty = blockIdx.y;
|
||||
const unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
smem[tid] = 0;
|
||||
__syncthreads();
|
||||
|
||||
for (int i = threadIdx.y; i < tileSize.y; i += blockDim.y)
|
||||
{
|
||||
const uchar* srcPtr = src.ptr(ty * tileSize.y + i) + tx * tileSize.x;
|
||||
for (int j = threadIdx.x; j < tileSize.x; j += blockDim.x)
|
||||
{
|
||||
const int data = srcPtr[j];
|
||||
Emulation::smem::atomicAdd(&smem[data], 1);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
int tHistVal = smem[tid];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (clipLimit > 0)
|
||||
{
|
||||
// clip histogram bar
|
||||
|
||||
int clipped = 0;
|
||||
if (tHistVal > clipLimit)
|
||||
{
|
||||
clipped = tHistVal - clipLimit;
|
||||
tHistVal = clipLimit;
|
||||
}
|
||||
|
||||
// find number of overall clipped samples
|
||||
|
||||
reduce<256>(smem, clipped, tid, plus<int>());
|
||||
|
||||
// broadcast evaluated value
|
||||
|
||||
__shared__ int totalClipped;
|
||||
|
||||
if (tid == 0)
|
||||
totalClipped = clipped;
|
||||
__syncthreads();
|
||||
|
||||
// redistribute clipped samples evenly
|
||||
|
||||
int redistBatch = totalClipped / 256;
|
||||
tHistVal += redistBatch;
|
||||
|
||||
int residual = totalClipped - redistBatch * 256;
|
||||
if (tid < residual)
|
||||
++tHistVal;
|
||||
}
|
||||
|
||||
const int lutVal = blockScanInclusive<256>(tHistVal, smem, tid);
|
||||
|
||||
lut(ty * tilesX + tx, tid) = saturate_cast<uchar>(__float2int_rn(lutScale * lutVal));
|
||||
}
|
||||
|
||||
void calcLut(PtrStepSzb src, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, int clipLimit, float lutScale, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(tilesX, tilesY);
|
||||
|
||||
calcLutKernel<<<grid, block, 0, stream>>>(src, lut, tileSize, tilesX, clipLimit, lutScale);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void tranformKernel(const PtrStepSzb src, PtrStepb dst, const PtrStepb lut, const int2 tileSize, const int tilesX, const int tilesY)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= src.cols || y >= src.rows)
|
||||
return;
|
||||
|
||||
const float tyf = (static_cast<float>(y) / tileSize.y) - 0.5f;
|
||||
int ty1 = __float2int_rd(tyf);
|
||||
int ty2 = ty1 + 1;
|
||||
const float ya = tyf - ty1;
|
||||
ty1 = ::max(ty1, 0);
|
||||
ty2 = ::min(ty2, tilesY - 1);
|
||||
|
||||
const float txf = (static_cast<float>(x) / tileSize.x) - 0.5f;
|
||||
int tx1 = __float2int_rd(txf);
|
||||
int tx2 = tx1 + 1;
|
||||
const float xa = txf - tx1;
|
||||
tx1 = ::max(tx1, 0);
|
||||
tx2 = ::min(tx2, tilesX - 1);
|
||||
|
||||
const int srcVal = src(y, x);
|
||||
|
||||
float res = 0;
|
||||
|
||||
res += lut(ty1 * tilesX + tx1, srcVal) * ((1.0f - xa) * (1.0f - ya));
|
||||
res += lut(ty1 * tilesX + tx2, srcVal) * ((xa) * (1.0f - ya));
|
||||
res += lut(ty2 * tilesX + tx1, srcVal) * ((1.0f - xa) * (ya));
|
||||
res += lut(ty2 * tilesX + tx2, srcVal) * ((xa) * (ya));
|
||||
|
||||
dst(y, x) = saturate_cast<uchar>(res);
|
||||
}
|
||||
|
||||
void transform(PtrStepSzb src, PtrStepSzb dst, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(tranformKernel, cudaFuncCachePreferL1) );
|
||||
|
||||
tranformKernel<<<grid, block, 0, stream>>>(src, dst, lut, tileSize, tilesX, tilesY);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
@@ -1,461 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/color.hpp"
|
||||
#include "cvt_color_internal.h"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_x = 8 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
|
||||
void name(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) \
|
||||
{ \
|
||||
traits::functor_type functor = traits::create_functor(); \
|
||||
typedef typename traits::functor_type::argument_type src_t; \
|
||||
typedef typename traits::functor_type::result_type dst_t; \
|
||||
cv::gpu::cudev::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, name ## _traits)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits<ushort>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_8u, name ## _full_traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_32f, name ## _full_traits<float>)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgba)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr565)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr565)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr565)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr565)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr565)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_gray)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_gray)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hsv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hsv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hsv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hsv4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hls4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hls4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hls4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hls4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_lab)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_lab)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_lab4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_lab4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_lab)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_lab)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_lab4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_lab4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_lab)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_lab)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_lab4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_lab4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_lab)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_lab)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_lab4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_lab4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lrgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lrgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lrgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lrgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lbgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lbgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lbgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lbgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_luv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_luv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_luv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_luv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_luv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_luv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_luv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_luv4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_luv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_luv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_luv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_luv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_luv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_luv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_luv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_luv4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lrgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lrgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lrgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lrgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lbgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lbgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lbgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lbgra)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,544 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
#include "opencv2/core/cuda/color.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct Bayer2BGR;
|
||||
|
||||
template <> struct Bayer2BGR<uchar>
|
||||
{
|
||||
uchar3 res0;
|
||||
uchar3 res1;
|
||||
uchar3 res2;
|
||||
uchar3 res3;
|
||||
|
||||
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
|
||||
{
|
||||
uchar4 patch[3][3];
|
||||
patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x];
|
||||
patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
|
||||
patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
|
||||
|
||||
patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x];
|
||||
patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)];
|
||||
patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
|
||||
|
||||
patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x];
|
||||
patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
|
||||
patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
|
||||
|
||||
if ((s_y & 1) ^ start_with_green)
|
||||
{
|
||||
const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
|
||||
const int t1 = (patch[1][0].w + patch[1][1].y + 1) >> 1;
|
||||
|
||||
const int t2 = (patch[0][1].x + patch[0][1].z + patch[2][1].x + patch[2][1].z + 2) >> 2;
|
||||
const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][1].z + patch[2][1].y + 2) >> 2;
|
||||
|
||||
const int t4 = (patch[0][1].z + patch[2][1].z + 1) >> 1;
|
||||
const int t5 = (patch[1][1].y + patch[1][1].w + 1) >> 1;
|
||||
|
||||
const int t6 = (patch[0][1].z + patch[0][2].x + patch[2][1].z + patch[2][2].x + 2) >> 2;
|
||||
const int t7 = (patch[0][1].w + patch[1][1].z + patch[1][2].x + patch[2][1].w + 2) >> 2;
|
||||
|
||||
if ((s_y & 1) ^ blue_last)
|
||||
{
|
||||
res0.x = t1;
|
||||
res0.y = patch[1][1].x;
|
||||
res0.z = t0;
|
||||
|
||||
res1.x = patch[1][1].y;
|
||||
res1.y = t3;
|
||||
res1.z = t2;
|
||||
|
||||
res2.x = t5;
|
||||
res2.y = patch[1][1].z;
|
||||
res2.z = t4;
|
||||
|
||||
res3.x = patch[1][1].w;
|
||||
res3.y = t7;
|
||||
res3.z = t6;
|
||||
}
|
||||
else
|
||||
{
|
||||
res0.x = t0;
|
||||
res0.y = patch[1][1].x;
|
||||
res0.z = t1;
|
||||
|
||||
res1.x = t2;
|
||||
res1.y = t3;
|
||||
res1.z = patch[1][1].y;
|
||||
|
||||
res2.x = t4;
|
||||
res2.y = patch[1][1].z;
|
||||
res2.z = t5;
|
||||
|
||||
res3.x = t6;
|
||||
res3.y = t7;
|
||||
res3.z = patch[1][1].w;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const int t0 = (patch[0][0].w + patch[0][1].y + patch[2][0].w + patch[2][1].y + 2) >> 2;
|
||||
const int t1 = (patch[0][1].x + patch[1][0].w + patch[1][1].y + patch[2][1].x + 2) >> 2;
|
||||
|
||||
const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
|
||||
const int t3 = (patch[1][1].x + patch[1][1].z + 1) >> 1;
|
||||
|
||||
const int t4 = (patch[0][1].y + patch[0][1].w + patch[2][1].y + patch[2][1].w + 2) >> 2;
|
||||
const int t5 = (patch[0][1].z + patch[1][1].y + patch[1][1].w + patch[2][1].z + 2) >> 2;
|
||||
|
||||
const int t6 = (patch[0][1].w + patch[2][1].w + 1) >> 1;
|
||||
const int t7 = (patch[1][1].z + patch[1][2].x + 1) >> 1;
|
||||
|
||||
if ((s_y & 1) ^ blue_last)
|
||||
{
|
||||
res0.x = patch[1][1].x;
|
||||
res0.y = t1;
|
||||
res0.z = t0;
|
||||
|
||||
res1.x = t3;
|
||||
res1.y = patch[1][1].y;
|
||||
res1.z = t2;
|
||||
|
||||
res2.x = patch[1][1].z;
|
||||
res2.y = t5;
|
||||
res2.z = t4;
|
||||
|
||||
res3.x = t7;
|
||||
res3.y = patch[1][1].w;
|
||||
res3.z = t6;
|
||||
}
|
||||
else
|
||||
{
|
||||
res0.x = t0;
|
||||
res0.y = t1;
|
||||
res0.z = patch[1][1].x;
|
||||
|
||||
res1.x = t2;
|
||||
res1.y = patch[1][1].y;
|
||||
res1.z = t3;
|
||||
|
||||
res2.x = t4;
|
||||
res2.y = t5;
|
||||
res2.z = patch[1][1].z;
|
||||
|
||||
res3.x = t6;
|
||||
res3.y = patch[1][1].w;
|
||||
res3.z = t7;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename D> __device__ __forceinline__ D toDst(const uchar3& pix);
|
||||
template <> __device__ __forceinline__ uchar toDst<uchar>(const uchar3& pix)
|
||||
{
|
||||
typename bgr_to_gray_traits<uchar>::functor_type f = bgr_to_gray_traits<uchar>::create_functor();
|
||||
return f(pix);
|
||||
}
|
||||
template <> __device__ __forceinline__ uchar3 toDst<uchar3>(const uchar3& pix)
|
||||
{
|
||||
return pix;
|
||||
}
|
||||
template <> __device__ __forceinline__ uchar4 toDst<uchar4>(const uchar3& pix)
|
||||
{
|
||||
return make_uchar4(pix.x, pix.y, pix.z, 255);
|
||||
}
|
||||
|
||||
template <typename D>
|
||||
__global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
|
||||
{
|
||||
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (s_y >= src.rows || (s_x << 2) >= src.cols)
|
||||
return;
|
||||
|
||||
s_y = ::min(::max(s_y, 1), src.rows - 2);
|
||||
|
||||
Bayer2BGR<uchar> bayer;
|
||||
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
|
||||
|
||||
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
|
||||
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
dst(d_y, d_x) = toDst<D>(bayer.res0);
|
||||
if (d_x + 1 < src.cols)
|
||||
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
|
||||
if (d_x + 2 < src.cols)
|
||||
dst(d_y, d_x + 2) = toDst<D>(bayer.res2);
|
||||
if (d_x + 3 < src.cols)
|
||||
dst(d_y, d_x + 3) = toDst<D>(bayer.res3);
|
||||
}
|
||||
|
||||
template <> struct Bayer2BGR<ushort>
|
||||
{
|
||||
ushort3 res0;
|
||||
ushort3 res1;
|
||||
|
||||
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
|
||||
{
|
||||
ushort2 patch[3][3];
|
||||
patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x];
|
||||
patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
|
||||
patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
|
||||
|
||||
patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x];
|
||||
patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)];
|
||||
patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
|
||||
|
||||
patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x];
|
||||
patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
|
||||
patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
|
||||
|
||||
if ((s_y & 1) ^ start_with_green)
|
||||
{
|
||||
const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
|
||||
const int t1 = (patch[1][0].y + patch[1][1].y + 1) >> 1;
|
||||
|
||||
const int t2 = (patch[0][1].x + patch[0][2].x + patch[2][1].x + patch[2][2].x + 2) >> 2;
|
||||
const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][2].x + patch[2][1].y + 2) >> 2;
|
||||
|
||||
if ((s_y & 1) ^ blue_last)
|
||||
{
|
||||
res0.x = t1;
|
||||
res0.y = patch[1][1].x;
|
||||
res0.z = t0;
|
||||
|
||||
res1.x = patch[1][1].y;
|
||||
res1.y = t3;
|
||||
res1.z = t2;
|
||||
}
|
||||
else
|
||||
{
|
||||
res0.x = t0;
|
||||
res0.y = patch[1][1].x;
|
||||
res0.z = t1;
|
||||
|
||||
res1.x = t2;
|
||||
res1.y = t3;
|
||||
res1.z = patch[1][1].y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const int t0 = (patch[0][0].y + patch[0][1].y + patch[2][0].y + patch[2][1].y + 2) >> 2;
|
||||
const int t1 = (patch[0][1].x + patch[1][0].y + patch[1][1].y + patch[2][1].x + 2) >> 2;
|
||||
|
||||
const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
|
||||
const int t3 = (patch[1][1].x + patch[1][2].x + 1) >> 1;
|
||||
|
||||
if ((s_y & 1) ^ blue_last)
|
||||
{
|
||||
res0.x = patch[1][1].x;
|
||||
res0.y = t1;
|
||||
res0.z = t0;
|
||||
|
||||
res1.x = t3;
|
||||
res1.y = patch[1][1].y;
|
||||
res1.z = t2;
|
||||
}
|
||||
else
|
||||
{
|
||||
res0.x = t0;
|
||||
res0.y = t1;
|
||||
res0.z = patch[1][1].x;
|
||||
|
||||
res1.x = t2;
|
||||
res1.y = patch[1][1].y;
|
||||
res1.z = t3;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename D> __device__ __forceinline__ D toDst(const ushort3& pix);
|
||||
template <> __device__ __forceinline__ ushort toDst<ushort>(const ushort3& pix)
|
||||
{
|
||||
typename bgr_to_gray_traits<ushort>::functor_type f = bgr_to_gray_traits<ushort>::create_functor();
|
||||
return f(pix);
|
||||
}
|
||||
template <> __device__ __forceinline__ ushort3 toDst<ushort3>(const ushort3& pix)
|
||||
{
|
||||
return pix;
|
||||
}
|
||||
template <> __device__ __forceinline__ ushort4 toDst<ushort4>(const ushort3& pix)
|
||||
{
|
||||
return make_ushort4(pix.x, pix.y, pix.z, numeric_limits<ushort>::max());
|
||||
}
|
||||
|
||||
template <typename D>
|
||||
__global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
|
||||
{
|
||||
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (s_y >= src.rows || (s_x << 1) >= src.cols)
|
||||
return;
|
||||
|
||||
s_y = ::min(::max(s_y, 1), src.rows - 2);
|
||||
|
||||
Bayer2BGR<ushort> bayer;
|
||||
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
|
||||
|
||||
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
|
||||
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
dst(d_y, d_x) = toDst<D>(bayer.res0);
|
||||
if (d_x + 1 < src.cols)
|
||||
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
|
||||
}
|
||||
|
||||
template <int cn>
|
||||
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
|
||||
|
||||
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int cn>
|
||||
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
|
||||
|
||||
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
|
||||
template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Bayer Demosaicing (Malvar, He, and Cutler)
|
||||
//
|
||||
// by Morgan McGuire, Williams College
|
||||
// http://graphics.cs.williams.edu/papers/BayerJGT09/#shaders
|
||||
//
|
||||
// ported to CUDA
|
||||
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> sourceTex(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
template <typename DstType>
|
||||
__global__ void MHCdemosaic(PtrStepSz<DstType> dst, const int2 sourceOffset, const int2 firstRed)
|
||||
{
|
||||
const float kAx = -1.0f / 8.0f, kAy = -1.5f / 8.0f, kAz = 0.5f / 8.0f /*kAw = -1.0f / 8.0f*/;
|
||||
const float kBx = 2.0f / 8.0f, /*kBy = 0.0f / 8.0f,*/ /*kBz = 0.0f / 8.0f,*/ kBw = 4.0f / 8.0f ;
|
||||
const float kCx = 4.0f / 8.0f, kCy = 6.0f / 8.0f, kCz = 5.0f / 8.0f /*kCw = 5.0f / 8.0f*/;
|
||||
const float /*kDx = 0.0f / 8.0f,*/ kDy = 2.0f / 8.0f, kDz = -1.0f / 8.0f /*kDw = -1.0f / 8.0f*/;
|
||||
const float kEx = -1.0f / 8.0f, kEy = -1.5f / 8.0f, /*kEz = -1.0f / 8.0f,*/ kEw = 0.5f / 8.0f ;
|
||||
const float kFx = 2.0f / 8.0f, /*kFy = 0.0f / 8.0f,*/ kFz = 4.0f / 8.0f /*kFw = 0.0f / 8.0f*/;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x == 0 || x >= dst.cols - 1 || y == 0 || y >= dst.rows - 1)
|
||||
return;
|
||||
|
||||
int2 center;
|
||||
center.x = x + sourceOffset.x;
|
||||
center.y = y + sourceOffset.y;
|
||||
|
||||
int4 xCoord;
|
||||
xCoord.x = center.x - 2;
|
||||
xCoord.y = center.x - 1;
|
||||
xCoord.z = center.x + 1;
|
||||
xCoord.w = center.x + 2;
|
||||
|
||||
int4 yCoord;
|
||||
yCoord.x = center.y - 2;
|
||||
yCoord.y = center.y - 1;
|
||||
yCoord.z = center.y + 1;
|
||||
yCoord.w = center.y + 2;
|
||||
|
||||
float C = tex2D(sourceTex, center.x, center.y); // ( 0, 0)
|
||||
|
||||
float4 Dvec;
|
||||
Dvec.x = tex2D(sourceTex, xCoord.y, yCoord.y); // (-1,-1)
|
||||
Dvec.y = tex2D(sourceTex, xCoord.y, yCoord.z); // (-1, 1)
|
||||
Dvec.z = tex2D(sourceTex, xCoord.z, yCoord.y); // ( 1,-1)
|
||||
Dvec.w = tex2D(sourceTex, xCoord.z, yCoord.z); // ( 1, 1)
|
||||
|
||||
float4 value;
|
||||
value.x = tex2D(sourceTex, center.x, yCoord.x); // ( 0,-2) A0
|
||||
value.y = tex2D(sourceTex, center.x, yCoord.y); // ( 0,-1) B0
|
||||
value.z = tex2D(sourceTex, xCoord.x, center.y); // (-2, 0) E0
|
||||
value.w = tex2D(sourceTex, xCoord.y, center.y); // (-1, 0) F0
|
||||
|
||||
// (A0 + A1), (B0 + B1), (E0 + E1), (F0 + F1)
|
||||
value.x += tex2D(sourceTex, center.x, yCoord.w); // ( 0, 2) A1
|
||||
value.y += tex2D(sourceTex, center.x, yCoord.z); // ( 0, 1) B1
|
||||
value.z += tex2D(sourceTex, xCoord.w, center.y); // ( 2, 0) E1
|
||||
value.w += tex2D(sourceTex, xCoord.z, center.y); // ( 1, 0) F1
|
||||
|
||||
float4 PATTERN;
|
||||
PATTERN.x = kCx * C;
|
||||
PATTERN.y = kCy * C;
|
||||
PATTERN.z = kCz * C;
|
||||
PATTERN.w = PATTERN.z;
|
||||
|
||||
float D = Dvec.x + Dvec.y + Dvec.z + Dvec.w;
|
||||
|
||||
// There are five filter patterns (identity, cross, checker,
|
||||
// theta, phi). Precompute the terms from all of them and then
|
||||
// use swizzles to assign to color channels.
|
||||
//
|
||||
// Channel Matches
|
||||
// x cross (e.g., EE G)
|
||||
// y checker (e.g., EE B)
|
||||
// z theta (e.g., EO R)
|
||||
// w phi (e.g., EO B)
|
||||
|
||||
#define A value.x // A0 + A1
|
||||
#define B value.y // B0 + B1
|
||||
#define E value.z // E0 + E1
|
||||
#define F value.w // F0 + F1
|
||||
|
||||
float3 temp;
|
||||
|
||||
// PATTERN.yzw += (kD.yz * D).xyy;
|
||||
temp.x = kDy * D;
|
||||
temp.y = kDz * D;
|
||||
PATTERN.y += temp.x;
|
||||
PATTERN.z += temp.y;
|
||||
PATTERN.w += temp.y;
|
||||
|
||||
// PATTERN += (kA.xyz * A).xyzx;
|
||||
temp.x = kAx * A;
|
||||
temp.y = kAy * A;
|
||||
temp.z = kAz * A;
|
||||
PATTERN.x += temp.x;
|
||||
PATTERN.y += temp.y;
|
||||
PATTERN.z += temp.z;
|
||||
PATTERN.w += temp.x;
|
||||
|
||||
// PATTERN += (kE.xyw * E).xyxz;
|
||||
temp.x = kEx * E;
|
||||
temp.y = kEy * E;
|
||||
temp.z = kEw * E;
|
||||
PATTERN.x += temp.x;
|
||||
PATTERN.y += temp.y;
|
||||
PATTERN.z += temp.x;
|
||||
PATTERN.w += temp.z;
|
||||
|
||||
// PATTERN.xw += kB.xw * B;
|
||||
PATTERN.x += kBx * B;
|
||||
PATTERN.w += kBw * B;
|
||||
|
||||
// PATTERN.xz += kF.xz * F;
|
||||
PATTERN.x += kFx * F;
|
||||
PATTERN.z += kFz * F;
|
||||
|
||||
// Determine which of four types of pixels we are on.
|
||||
int2 alternate;
|
||||
alternate.x = (x + firstRed.x) % 2;
|
||||
alternate.y = (y + firstRed.y) % 2;
|
||||
|
||||
// in BGR sequence;
|
||||
uchar3 pixelColor =
|
||||
(alternate.y == 0) ?
|
||||
((alternate.x == 0) ?
|
||||
make_uchar3(saturate_cast<uchar>(PATTERN.y), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(C)) :
|
||||
make_uchar3(saturate_cast<uchar>(PATTERN.w), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.z))) :
|
||||
((alternate.x == 0) ?
|
||||
make_uchar3(saturate_cast<uchar>(PATTERN.z), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.w)) :
|
||||
make_uchar3(saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(PATTERN.y)));
|
||||
|
||||
dst(y, x) = toDst<DstType>(pixelColor);
|
||||
}
|
||||
|
||||
template <int cn>
|
||||
void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
bindTexture(&sourceTex, src);
|
||||
|
||||
MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
|
||||
template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
|
||||
template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,143 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace gfft
|
||||
{
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> eigTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__device__ int g_counter = 0;
|
||||
|
||||
template <class Mask> __global__ void findCorners(float threshold, const Mask mask, float2* corners, int max_count, int rows, int cols)
|
||||
{
|
||||
const int j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 && mask(i, j))
|
||||
{
|
||||
float val = tex2D(eigTex, j, i);
|
||||
|
||||
if (val > threshold)
|
||||
{
|
||||
float maxVal = val;
|
||||
|
||||
maxVal = ::fmax(tex2D(eigTex, j - 1, i - 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j , i - 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j + 1, i - 1), maxVal);
|
||||
|
||||
maxVal = ::fmax(tex2D(eigTex, j - 1, i), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j + 1, i), maxVal);
|
||||
|
||||
maxVal = ::fmax(tex2D(eigTex, j - 1, i + 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j , i + 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j + 1, i + 1), maxVal);
|
||||
|
||||
if (val == maxVal)
|
||||
{
|
||||
const int ind = ::atomicAdd(&g_counter, 1);
|
||||
|
||||
if (ind < max_count)
|
||||
corners[ind] = make_float2(j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
|
||||
bindTexture(&eigTex, eig);
|
||||
|
||||
dim3 block(16, 16);
|
||||
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
|
||||
|
||||
if (mask.data)
|
||||
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
|
||||
else
|
||||
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
return std::min(count, max_count);
|
||||
}
|
||||
|
||||
class EigGreater
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
|
||||
{
|
||||
return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
|
||||
{
|
||||
bindTexture(&eigTex, eig);
|
||||
|
||||
thrust::device_ptr<float2> ptr(corners);
|
||||
|
||||
thrust::sort(ptr, ptr + count, EigGreater());
|
||||
}
|
||||
} // namespace optical_flow
|
||||
}}}
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,153 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace hist
|
||||
{
|
||||
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t step, int* hist)
|
||||
{
|
||||
__shared__ int shist[256];
|
||||
|
||||
const int y = blockIdx.x * blockDim.y + threadIdx.y;
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
shist[tid] = 0;
|
||||
__syncthreads();
|
||||
|
||||
if (y < rows)
|
||||
{
|
||||
const unsigned int* rowPtr = (const unsigned int*) (src + y * step);
|
||||
|
||||
const int cols_4 = cols / 4;
|
||||
for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
|
||||
{
|
||||
unsigned int data = rowPtr[x];
|
||||
|
||||
Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1);
|
||||
Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1);
|
||||
Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1);
|
||||
Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1);
|
||||
}
|
||||
|
||||
if (cols % 4 != 0 && threadIdx.x == 0)
|
||||
{
|
||||
for (int x = cols_4 * 4; x < cols; ++x)
|
||||
{
|
||||
unsigned int data = ((const uchar*)rowPtr)[x];
|
||||
Emulation::smem::atomicAdd(&shist[data], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
const int histVal = shist[tid];
|
||||
if (histVal > 0)
|
||||
::atomicAdd(hist + tid, histVal);
|
||||
}
|
||||
|
||||
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.rows, block.y));
|
||||
|
||||
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace hist
|
||||
{
|
||||
__constant__ int c_lut[256];
|
||||
|
||||
struct EqualizeHist : unary_function<uchar, uchar>
|
||||
{
|
||||
float scale;
|
||||
|
||||
__host__ EqualizeHist(float _scale) : scale(_scale) {}
|
||||
|
||||
__device__ __forceinline__ uchar operator ()(uchar val) const
|
||||
{
|
||||
const int lut = c_lut[val];
|
||||
return __float2int_rn(scale * lut);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits<hist::EqualizeHist> : DefaultTransformFunctorTraits<hist::EqualizeHist>
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace hist
|
||||
{
|
||||
void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
|
||||
const float scale = 255.0f / (src.cols * src.rows);
|
||||
|
||||
cudev::transform(src, dst, EqualizeHist(scale), WithOutMask(), stream);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
File diff suppressed because it is too large
Load Diff
@@ -1,754 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
||||
|
||||
texture<uchar4, 2> tex_meanshift;
|
||||
|
||||
__device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
|
||||
size_t out_step, int cols, int rows,
|
||||
int sp, int sr, int maxIter, float eps)
|
||||
{
|
||||
int isr2 = sr*sr;
|
||||
uchar4 c = tex2D(tex_meanshift, x0, y0 );
|
||||
|
||||
// iterate meanshift procedure
|
||||
for( int iter = 0; iter < maxIter; iter++ )
|
||||
{
|
||||
int count = 0;
|
||||
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
|
||||
float icount;
|
||||
|
||||
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
|
||||
int minx = x0-sp;
|
||||
int miny = y0-sp;
|
||||
int maxx = x0+sp;
|
||||
int maxy = y0+sp;
|
||||
|
||||
for( int y = miny; y <= maxy; y++)
|
||||
{
|
||||
int rowCount = 0;
|
||||
for( int x = minx; x <= maxx; x++ )
|
||||
{
|
||||
uchar4 t = tex2D( tex_meanshift, x, y );
|
||||
|
||||
int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z);
|
||||
if( norm2 <= isr2 )
|
||||
{
|
||||
s0 += t.x; s1 += t.y; s2 += t.z;
|
||||
sx += x; rowCount++;
|
||||
}
|
||||
}
|
||||
count += rowCount;
|
||||
sy += y*rowCount;
|
||||
}
|
||||
|
||||
if( count == 0 )
|
||||
break;
|
||||
|
||||
icount = 1.f/count;
|
||||
int x1 = __float2int_rz(sx*icount);
|
||||
int y1 = __float2int_rz(sy*icount);
|
||||
s0 = __float2int_rz(s0*icount);
|
||||
s1 = __float2int_rz(s1*icount);
|
||||
s2 = __float2int_rz(s2*icount);
|
||||
|
||||
int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z);
|
||||
|
||||
bool stopFlag = (x0 == x1 && y0 == y1) || (::abs(x1-x0) + ::abs(y1-y0) + norm2 <= eps);
|
||||
|
||||
x0 = x1; y0 = y1;
|
||||
c.x = s0; c.y = s1; c.z = s2;
|
||||
|
||||
if( stopFlag )
|
||||
break;
|
||||
}
|
||||
|
||||
int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 4 * sizeof(uchar);
|
||||
*(uchar4*)(out + base) = c;
|
||||
|
||||
return make_short2((short)x0, (short)y0);
|
||||
}
|
||||
|
||||
__global__ void meanshift_kernel(unsigned char* out, size_t out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
|
||||
{
|
||||
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if( x0 < cols && y0 < rows )
|
||||
do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps);
|
||||
}
|
||||
|
||||
__global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep,
|
||||
unsigned char* outsp, size_t outspstep,
|
||||
int cols, int rows,
|
||||
int sp, int sr, int maxIter, float eps)
|
||||
{
|
||||
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if( x0 < cols && y0 < rows )
|
||||
{
|
||||
int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short);
|
||||
*(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
|
||||
}
|
||||
}
|
||||
|
||||
void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
||||
{
|
||||
dim3 grid(1, 1, 1);
|
||||
dim3 threads(32, 8, 1);
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
|
||||
|
||||
meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||
}
|
||||
|
||||
void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
||||
{
|
||||
dim3 grid(1, 1, 1);
|
||||
dim3 threads(32, 8, 1);
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
|
||||
|
||||
meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||
}
|
||||
|
||||
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
|
||||
{
|
||||
unsigned int H = ((ndisp-d) * 240)/ndisp;
|
||||
|
||||
unsigned int hi = (H/60) % 6;
|
||||
float f = H/60.f - H/60;
|
||||
float p = V * (1 - S);
|
||||
float q = V * (1 - f * S);
|
||||
float t = V * (1 - (1 - f) * S);
|
||||
|
||||
float3 res;
|
||||
|
||||
if (hi == 0) //R = V, G = t, B = p
|
||||
{
|
||||
res.x = p;
|
||||
res.y = t;
|
||||
res.z = V;
|
||||
}
|
||||
|
||||
if (hi == 1) // R = q, G = V, B = p
|
||||
{
|
||||
res.x = p;
|
||||
res.y = V;
|
||||
res.z = q;
|
||||
}
|
||||
|
||||
if (hi == 2) // R = p, G = V, B = t
|
||||
{
|
||||
res.x = t;
|
||||
res.y = V;
|
||||
res.z = p;
|
||||
}
|
||||
|
||||
if (hi == 3) // R = p, G = q, B = V
|
||||
{
|
||||
res.x = V;
|
||||
res.y = q;
|
||||
res.z = p;
|
||||
}
|
||||
|
||||
if (hi == 4) // R = t, G = p, B = V
|
||||
{
|
||||
res.x = V;
|
||||
res.y = p;
|
||||
res.z = t;
|
||||
}
|
||||
|
||||
if (hi == 5) // R = V, G = p, B = q
|
||||
{
|
||||
res.x = q;
|
||||
res.y = p;
|
||||
res.z = V;
|
||||
}
|
||||
const unsigned int b = (unsigned int)(::max(0.f, ::min(res.x, 1.f)) * 255.f);
|
||||
const unsigned int g = (unsigned int)(::max(0.f, ::min(res.y, 1.f)) * 255.f);
|
||||
const unsigned int r = (unsigned int)(::max(0.f, ::min(res.z, 1.f)) * 255.f);
|
||||
const unsigned int a = 255U;
|
||||
|
||||
return (a << 24) + (r << 16) + (g << 8) + b;
|
||||
}
|
||||
|
||||
__global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
|
||||
{
|
||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if(x < width && y < height)
|
||||
{
|
||||
uchar4 d4 = *(uchar4*)(disp + y * disp_step + x);
|
||||
|
||||
uint4 res;
|
||||
res.x = cvtPixel(d4.x, ndisp);
|
||||
res.y = cvtPixel(d4.y, ndisp);
|
||||
res.z = cvtPixel(d4.z, ndisp);
|
||||
res.w = cvtPixel(d4.w, ndisp);
|
||||
|
||||
uint4* line = (uint4*)(out_image + y * out_step);
|
||||
line[x >> 2] = res;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
|
||||
{
|
||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if(x < width && y < height)
|
||||
{
|
||||
short2 d2 = *(short2*)(disp + y * disp_step + x);
|
||||
|
||||
uint2 res;
|
||||
res.x = cvtPixel(d2.x, ndisp);
|
||||
res.y = cvtPixel(d2.y, ndisp);
|
||||
|
||||
uint2* line = (uint2*)(out_image + y * out_step);
|
||||
line[x >> 1] = res;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
grid.x = divUp(src.cols, threads.x << 2);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
grid.x = divUp(src.cols, threads.x << 1);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
|
||||
|
||||
__constant__ float cq[16];
|
||||
|
||||
template <typename T, typename D>
|
||||
__global__ void reprojectImageTo3D(const PtrStepSz<T> disp, PtrStep<D> xyz)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y >= disp.rows || x >= disp.cols)
|
||||
return;
|
||||
|
||||
const float qx = x * cq[ 0] + y * cq[ 1] + cq[ 3];
|
||||
const float qy = x * cq[ 4] + y * cq[ 5] + cq[ 7];
|
||||
const float qz = x * cq[ 8] + y * cq[ 9] + cq[11];
|
||||
const float qw = x * cq[12] + y * cq[13] + cq[15];
|
||||
|
||||
const T d = disp(y, x);
|
||||
|
||||
const float iW = 1.f / (qw + cq[14] * d);
|
||||
|
||||
D v = VecTraits<D>::all(1.0f);
|
||||
v.x = (qx + cq[2] * d) * iW;
|
||||
v.y = (qy + cq[6] * d) * iW;
|
||||
v.z = (qz + cq[10] * d) * iW;
|
||||
|
||||
xyz(y, x) = v;
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
|
||||
|
||||
reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||
template void reprojectImageTo3D_gpu<uchar, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||
template void reprojectImageTo3D_gpu<short, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||
template void reprojectImageTo3D_gpu<short, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||
|
||||
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
|
||||
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__global__ void cornerHarris_kernel(const int block_size, const float k, PtrStepSzf dst)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
float a = 0.f;
|
||||
float b = 0.f;
|
||||
float c = 0.f;
|
||||
|
||||
const int ibegin = y - (block_size / 2);
|
||||
const int jbegin = x - (block_size / 2);
|
||||
const int iend = ibegin + block_size;
|
||||
const int jend = jbegin + block_size;
|
||||
|
||||
for (int i = ibegin; i < iend; ++i)
|
||||
{
|
||||
for (int j = jbegin; j < jend; ++j)
|
||||
{
|
||||
float dx = tex2D(harrisDxTex, j, i);
|
||||
float dy = tex2D(harrisDyTex, j, i);
|
||||
|
||||
a += dx * dx;
|
||||
b += dx * dy;
|
||||
c += dy * dy;
|
||||
}
|
||||
}
|
||||
|
||||
dst(y, x) = a * c - b * b - k * (a + c) * (a + c);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename BR, typename BC>
|
||||
__global__ void cornerHarris_kernel(const int block_size, const float k, PtrStepSzf dst, const BR border_row, const BC border_col)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
float a = 0.f;
|
||||
float b = 0.f;
|
||||
float c = 0.f;
|
||||
|
||||
const int ibegin = y - (block_size / 2);
|
||||
const int jbegin = x - (block_size / 2);
|
||||
const int iend = ibegin + block_size;
|
||||
const int jend = jbegin + block_size;
|
||||
|
||||
for (int i = ibegin; i < iend; ++i)
|
||||
{
|
||||
const int y = border_col.idx_row(i);
|
||||
|
||||
for (int j = jbegin; j < jend; ++j)
|
||||
{
|
||||
const int x = border_row.idx_col(j);
|
||||
|
||||
float dx = tex2D(harrisDxTex, x, y);
|
||||
float dy = tex2D(harrisDyTex, x, y);
|
||||
|
||||
a += dx * dx;
|
||||
b += dx * dy;
|
||||
c += dy * dy;
|
||||
}
|
||||
}
|
||||
|
||||
dst(y, x) = a * c - b * b - k * (a + c) * (a + c);
|
||||
}
|
||||
}
|
||||
|
||||
void cornerHarris_gpu(int block_size, float k, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
|
||||
|
||||
bindTexture(&harrisDxTex, Dx);
|
||||
bindTexture(&harrisDyTex, Dy);
|
||||
|
||||
switch (border_type)
|
||||
{
|
||||
case BORDER_REFLECT101_GPU:
|
||||
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
|
||||
break;
|
||||
|
||||
case BORDER_REFLECT_GPU:
|
||||
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
|
||||
break;
|
||||
|
||||
case BORDER_REPLICATE_GPU:
|
||||
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst);
|
||||
break;
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
/////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
|
||||
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__global__ void cornerMinEigenVal_kernel(const int block_size, PtrStepSzf dst)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
float a = 0.f;
|
||||
float b = 0.f;
|
||||
float c = 0.f;
|
||||
|
||||
const int ibegin = y - (block_size / 2);
|
||||
const int jbegin = x - (block_size / 2);
|
||||
const int iend = ibegin + block_size;
|
||||
const int jend = jbegin + block_size;
|
||||
|
||||
for (int i = ibegin; i < iend; ++i)
|
||||
{
|
||||
for (int j = jbegin; j < jend; ++j)
|
||||
{
|
||||
float dx = tex2D(minEigenValDxTex, j, i);
|
||||
float dy = tex2D(minEigenValDyTex, j, i);
|
||||
|
||||
a += dx * dx;
|
||||
b += dx * dy;
|
||||
c += dy * dy;
|
||||
}
|
||||
}
|
||||
|
||||
a *= 0.5f;
|
||||
c *= 0.5f;
|
||||
|
||||
dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename BR, typename BC>
|
||||
__global__ void cornerMinEigenVal_kernel(const int block_size, PtrStepSzf dst, const BR border_row, const BC border_col)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
float a = 0.f;
|
||||
float b = 0.f;
|
||||
float c = 0.f;
|
||||
|
||||
const int ibegin = y - (block_size / 2);
|
||||
const int jbegin = x - (block_size / 2);
|
||||
const int iend = ibegin + block_size;
|
||||
const int jend = jbegin + block_size;
|
||||
|
||||
for (int i = ibegin; i < iend; ++i)
|
||||
{
|
||||
int y = border_col.idx_row(i);
|
||||
|
||||
for (int j = jbegin; j < jend; ++j)
|
||||
{
|
||||
int x = border_row.idx_col(j);
|
||||
|
||||
float dx = tex2D(minEigenValDxTex, x, y);
|
||||
float dy = tex2D(minEigenValDyTex, x, y);
|
||||
|
||||
a += dx * dx;
|
||||
b += dx * dy;
|
||||
c += dy * dy;
|
||||
}
|
||||
}
|
||||
|
||||
a *= 0.5f;
|
||||
c *= 0.5f;
|
||||
|
||||
dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b);
|
||||
}
|
||||
}
|
||||
|
||||
void cornerMinEigenVal_gpu(int block_size, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
|
||||
|
||||
bindTexture(&minEigenValDxTex, Dx);
|
||||
bindTexture(&minEigenValDyTex, Dy);
|
||||
|
||||
switch (border_type)
|
||||
{
|
||||
case BORDER_REFLECT101_GPU:
|
||||
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
|
||||
break;
|
||||
|
||||
case BORDER_REFLECT_GPU:
|
||||
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
|
||||
break;
|
||||
|
||||
case BORDER_REPLICATE_GPU:
|
||||
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst);
|
||||
break;
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpMaps
|
||||
|
||||
// TODO use intrinsics like __sinf and so on
|
||||
|
||||
namespace build_warp_maps
|
||||
{
|
||||
|
||||
__constant__ float ck_rinv[9];
|
||||
__constant__ float cr_kinv[9];
|
||||
__constant__ float ct[3];
|
||||
__constant__ float cscale;
|
||||
}
|
||||
|
||||
|
||||
class PlaneMapper
|
||||
{
|
||||
public:
|
||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||
{
|
||||
using namespace build_warp_maps;
|
||||
|
||||
float x_ = u / cscale - ct[0];
|
||||
float y_ = v / cscale - ct[1];
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
|
||||
|
||||
x /= z;
|
||||
y /= z;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class CylindricalMapper
|
||||
{
|
||||
public:
|
||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||
{
|
||||
using namespace build_warp_maps;
|
||||
|
||||
u /= cscale;
|
||||
float x_ = ::sinf(u);
|
||||
float y_ = v / cscale;
|
||||
float z_ = ::cosf(u);
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class SphericalMapper
|
||||
{
|
||||
public:
|
||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||
{
|
||||
using namespace build_warp_maps;
|
||||
|
||||
v /= cscale;
|
||||
u /= cscale;
|
||||
|
||||
float sinv = ::sinf(v);
|
||||
float x_ = sinv * ::sinf(u);
|
||||
float y_ = -::cosf(v);
|
||||
float z_ = sinv * ::cosf(u);
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Mapper>
|
||||
__global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,
|
||||
PtrStepf map_x, PtrStepf map_y)
|
||||
{
|
||||
int du = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int dv = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (du < cols && dv < rows)
|
||||
{
|
||||
float u = tl_u + du;
|
||||
float v = tl_v + dv;
|
||||
float x, y;
|
||||
Mapper::mapBackward(u, v, x, y);
|
||||
map_x.ptr(dv)[du] = x;
|
||||
map_y.ptr(dv)[du] = y;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], const float t[3],
|
||||
float scale, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<PlaneMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<CylindricalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev {
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,916 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace match_template
|
||||
{
|
||||
__device__ __forceinline__ float sum(float v) { return v; }
|
||||
__device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
|
||||
__device__ __forceinline__ float sum(float3 v) { return v.x + v.y + v.z; }
|
||||
__device__ __forceinline__ float sum(float4 v) { return v.x + v.y + v.z + v.w; }
|
||||
|
||||
__device__ __forceinline__ float first(float v) { return v; }
|
||||
__device__ __forceinline__ float first(float2 v) { return v.x; }
|
||||
__device__ __forceinline__ float first(float3 v) { return v.x; }
|
||||
__device__ __forceinline__ float first(float4 v) { return v.x; }
|
||||
|
||||
__device__ __forceinline__ float mul(float a, float b) { return a * b; }
|
||||
__device__ __forceinline__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
||||
__device__ __forceinline__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
||||
__device__ __forceinline__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
||||
|
||||
__device__ __forceinline__ float mul(uchar a, uchar b) { return a * b; }
|
||||
__device__ __forceinline__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
||||
__device__ __forceinline__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
||||
__device__ __forceinline__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
||||
|
||||
__device__ __forceinline__ float sub(float a, float b) { return a - b; }
|
||||
__device__ __forceinline__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
||||
__device__ __forceinline__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
||||
__device__ __forceinline__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
||||
|
||||
__device__ __forceinline__ float sub(uchar a, uchar b) { return a - b; }
|
||||
__device__ __forceinline__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
||||
__device__ __forceinline__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
||||
__device__ __forceinline__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Naive_CCORR
|
||||
|
||||
template <typename T, int cn>
|
||||
__global__ void matchTemplateNaiveKernel_CCORR(int w, int h, const PtrStepb image, const PtrStepb templ, PtrStepSzf result)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
Typef res = VecTraits<Typef>::all(0);
|
||||
|
||||
for (int i = 0; i < h; ++i)
|
||||
{
|
||||
const Type* image_ptr = (const Type*)image.ptr(y + i);
|
||||
const Type* templ_ptr = (const Type*)templ.ptr(i);
|
||||
for (int j = 0; j < w; ++j)
|
||||
res = res + mul(image_ptr[x + j], templ_ptr[j]);
|
||||
}
|
||||
|
||||
result.ptr(y)[x] = sum(res);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int cn>
|
||||
void matchTemplateNaive_CCORR(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
const dim3 threads(32, 8);
|
||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplateNaiveKernel_CCORR<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplateNaive_CCORR<float, 1>, matchTemplateNaive_CCORR<float, 2>, matchTemplateNaive_CCORR<float, 3>, matchTemplateNaive_CCORR<float, 4>
|
||||
};
|
||||
|
||||
callers[cn](image, templ, result, stream);
|
||||
}
|
||||
|
||||
|
||||
void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplateNaive_CCORR<uchar, 1>, matchTemplateNaive_CCORR<uchar, 2>, matchTemplateNaive_CCORR<uchar, 3>, matchTemplateNaive_CCORR<uchar, 4>
|
||||
};
|
||||
|
||||
callers[cn](image, templ, result, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Naive_SQDIFF
|
||||
|
||||
template <typename T, int cn>
|
||||
__global__ void matchTemplateNaiveKernel_SQDIFF(int w, int h, const PtrStepb image, const PtrStepb templ, PtrStepSzf result)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
Typef res = VecTraits<Typef>::all(0);
|
||||
Typef delta;
|
||||
|
||||
for (int i = 0; i < h; ++i)
|
||||
{
|
||||
const Type* image_ptr = (const Type*)image.ptr(y + i);
|
||||
const Type* templ_ptr = (const Type*)templ.ptr(i);
|
||||
for (int j = 0; j < w; ++j)
|
||||
{
|
||||
delta = sub(image_ptr[x + j], templ_ptr[j]);
|
||||
res = res + delta * delta;
|
||||
}
|
||||
}
|
||||
|
||||
result.ptr(y)[x] = sum(res);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int cn>
|
||||
void matchTemplateNaive_SQDIFF(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
const dim3 threads(32, 8);
|
||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplateNaiveKernel_SQDIFF<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplateNaive_SQDIFF<float, 1>, matchTemplateNaive_SQDIFF<float, 2>, matchTemplateNaive_SQDIFF<float, 3>, matchTemplateNaive_SQDIFF<float, 4>
|
||||
};
|
||||
|
||||
callers[cn](image, templ, result, stream);
|
||||
}
|
||||
|
||||
void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplateNaive_SQDIFF<uchar, 1>, matchTemplateNaive_SQDIFF<uchar, 2>, matchTemplateNaive_SQDIFF<uchar, 3>, matchTemplateNaive_SQDIFF<uchar, 4>
|
||||
};
|
||||
|
||||
callers[cn](image, templ, result, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Prepared_SQDIFF
|
||||
|
||||
template <int cn>
|
||||
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sqsum_ = (float)(
|
||||
(image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) -
|
||||
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
|
||||
float ccorr = result.ptr(y)[x];
|
||||
result.ptr(y)[x] = image_sqsum_ - 2.f * ccorr + templ_sqsum;
|
||||
}
|
||||
}
|
||||
|
||||
template <int cn>
|
||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
const dim3 threads(32, 8);
|
||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_SQDIFF_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplatePrepared_SQDIFF_8U<1>, matchTemplatePrepared_SQDIFF_8U<2>, matchTemplatePrepared_SQDIFF_8U<3>, matchTemplatePrepared_SQDIFF_8U<4>
|
||||
};
|
||||
|
||||
callers[cn](w, h, image_sqsum, templ_sqsum, result, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Prepared_SQDIFF_NORMED
|
||||
|
||||
// normAcc* are accurate normalization routines which make GPU matchTemplate
|
||||
// consistent with CPU one
|
||||
|
||||
__device__ float normAcc(float num, float denum)
|
||||
{
|
||||
if (::fabs(num) < denum)
|
||||
return num / denum;
|
||||
if (::fabs(num) < denum * 1.125f)
|
||||
return num > 0 ? 1 : -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
__device__ float normAcc_SQDIFF(float num, float denum)
|
||||
{
|
||||
if (::fabs(num) < denum)
|
||||
return num / denum;
|
||||
if (::fabs(num) < denum * 1.125f)
|
||||
return num > 0 ? 1 : -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
template <int cn>
|
||||
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
|
||||
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
||||
unsigned long long templ_sqsum, PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sqsum_ = (float)(
|
||||
(image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) -
|
||||
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
|
||||
float ccorr = result.ptr(y)[x];
|
||||
result.ptr(y)[x] = normAcc_SQDIFF(image_sqsum_ - 2.f * ccorr + templ_sqsum,
|
||||
sqrtf(image_sqsum_ * templ_sqsum));
|
||||
}
|
||||
}
|
||||
|
||||
template <int cn>
|
||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
||||
PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
const dim3 threads(32, 8);
|
||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_SQDIFF_NORMED_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
||||
PtrStepSzf result, int cn, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
|
||||
};
|
||||
|
||||
callers[cn](w, h, image_sqsum, templ_sqsum, result, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Prepared_CCOFF
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<unsigned int> image_sum, PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sum_ = (float)(
|
||||
(image_sum.ptr(y + h)[x + w] - image_sum.ptr(y)[x + w]) -
|
||||
(image_sum.ptr(y + h)[x] - image_sum.ptr(y)[x]));
|
||||
float ccorr = result.ptr(y)[x];
|
||||
result.ptr(y)[x] = ccorr - image_sum_ * templ_sum_scale;
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_8U<<<grid, threads, 0, stream>>>(w, h, (float)templ_sum / (w * h), image_sum, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_8UC2(
|
||||
int w, int h, float templ_sum_scale_r, float templ_sum_scale_g,
|
||||
const PtrStep<unsigned int> image_sum_r,
|
||||
const PtrStep<unsigned int> image_sum_g,
|
||||
PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sum_r_ = (float)(
|
||||
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
|
||||
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
|
||||
float image_sum_g_ = (float)(
|
||||
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
|
||||
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
|
||||
float ccorr = result.ptr(y)[x];
|
||||
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
|
||||
- image_sum_g_ * templ_sum_scale_g;
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_8UC2(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g,
|
||||
unsigned int templ_sum_r, unsigned int templ_sum_g,
|
||||
PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_8UC2<<<grid, threads, 0, stream>>>(
|
||||
w, h, (float)templ_sum_r / (w * h), (float)templ_sum_g / (w * h),
|
||||
image_sum_r, image_sum_g, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_8UC3(
|
||||
int w, int h,
|
||||
float templ_sum_scale_r,
|
||||
float templ_sum_scale_g,
|
||||
float templ_sum_scale_b,
|
||||
const PtrStep<unsigned int> image_sum_r,
|
||||
const PtrStep<unsigned int> image_sum_g,
|
||||
const PtrStep<unsigned int> image_sum_b,
|
||||
PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sum_r_ = (float)(
|
||||
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
|
||||
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
|
||||
float image_sum_g_ = (float)(
|
||||
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
|
||||
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
|
||||
float image_sum_b_ = (float)(
|
||||
(image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) -
|
||||
(image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x]));
|
||||
float ccorr = result.ptr(y)[x];
|
||||
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
|
||||
- image_sum_g_ * templ_sum_scale_g
|
||||
- image_sum_b_ * templ_sum_scale_b;
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_8UC3(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g,
|
||||
const PtrStepSz<unsigned int> image_sum_b,
|
||||
unsigned int templ_sum_r,
|
||||
unsigned int templ_sum_g,
|
||||
unsigned int templ_sum_b,
|
||||
PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_8UC3<<<grid, threads, 0, stream>>>(
|
||||
w, h,
|
||||
(float)templ_sum_r / (w * h),
|
||||
(float)templ_sum_g / (w * h),
|
||||
(float)templ_sum_b / (w * h),
|
||||
image_sum_r, image_sum_g, image_sum_b, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_8UC4(
|
||||
int w, int h,
|
||||
float templ_sum_scale_r,
|
||||
float templ_sum_scale_g,
|
||||
float templ_sum_scale_b,
|
||||
float templ_sum_scale_a,
|
||||
const PtrStep<unsigned int> image_sum_r,
|
||||
const PtrStep<unsigned int> image_sum_g,
|
||||
const PtrStep<unsigned int> image_sum_b,
|
||||
const PtrStep<unsigned int> image_sum_a,
|
||||
PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sum_r_ = (float)(
|
||||
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
|
||||
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
|
||||
float image_sum_g_ = (float)(
|
||||
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
|
||||
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
|
||||
float image_sum_b_ = (float)(
|
||||
(image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) -
|
||||
(image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x]));
|
||||
float image_sum_a_ = (float)(
|
||||
(image_sum_a.ptr(y + h)[x + w] - image_sum_a.ptr(y)[x + w]) -
|
||||
(image_sum_a.ptr(y + h)[x] - image_sum_a.ptr(y)[x]));
|
||||
float ccorr = result.ptr(y)[x];
|
||||
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
|
||||
- image_sum_g_ * templ_sum_scale_g
|
||||
- image_sum_b_ * templ_sum_scale_b
|
||||
- image_sum_a_ * templ_sum_scale_a;
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_8UC4(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g,
|
||||
const PtrStepSz<unsigned int> image_sum_b,
|
||||
const PtrStepSz<unsigned int> image_sum_a,
|
||||
unsigned int templ_sum_r,
|
||||
unsigned int templ_sum_g,
|
||||
unsigned int templ_sum_b,
|
||||
unsigned int templ_sum_a,
|
||||
PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_8UC4<<<grid, threads, 0, stream>>>(
|
||||
w, h,
|
||||
(float)templ_sum_r / (w * h),
|
||||
(float)templ_sum_g / (w * h),
|
||||
(float)templ_sum_b / (w * h),
|
||||
(float)templ_sum_a / (w * h),
|
||||
image_sum_r, image_sum_g, image_sum_b, image_sum_a,
|
||||
result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Prepared_CCOFF_NORMED
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale, float templ_sqsum_scale,
|
||||
const PtrStep<unsigned int> image_sum,
|
||||
const PtrStep<unsigned long long> image_sqsum,
|
||||
PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float ccorr = result.ptr(y)[x];
|
||||
float image_sum_ = (float)(
|
||||
(image_sum.ptr(y + h)[x + w] - image_sum.ptr(y)[x + w]) -
|
||||
(image_sum.ptr(y + h)[x] - image_sum.ptr(y)[x]));
|
||||
float image_sqsum_ = (float)(
|
||||
(image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) -
|
||||
(image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x]));
|
||||
result.ptr(y)[x] = normAcc(ccorr - image_sum_ * templ_sum_scale,
|
||||
sqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_)));
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||
int w, int h, const PtrStepSz<unsigned int> image_sum,
|
||||
const PtrStepSz<unsigned long long> image_sqsum,
|
||||
unsigned int templ_sum, unsigned long long templ_sqsum,
|
||||
PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
float weight = 1.f / (w * h);
|
||||
float templ_sum_scale = templ_sum * weight;
|
||||
float templ_sqsum_scale = templ_sqsum - weight * templ_sum * templ_sum;
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_NORMED_8U<<<grid, threads, 0, stream>>>(
|
||||
w, h, weight, templ_sum_scale, templ_sqsum_scale,
|
||||
image_sum, image_sqsum, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2(
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale_r, float templ_sum_scale_g,
|
||||
float templ_sqsum_scale,
|
||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||
PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sum_r_ = (float)(
|
||||
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
|
||||
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
|
||||
float image_sqsum_r_ = (float)(
|
||||
(image_sqsum_r.ptr(y + h)[x + w] - image_sqsum_r.ptr(y)[x + w]) -
|
||||
(image_sqsum_r.ptr(y + h)[x] - image_sqsum_r.ptr(y)[x]));
|
||||
float image_sum_g_ = (float)(
|
||||
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
|
||||
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
|
||||
float image_sqsum_g_ = (float)(
|
||||
(image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) -
|
||||
(image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x]));
|
||||
|
||||
float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r
|
||||
- image_sum_g_ * templ_sum_scale_g;
|
||||
float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_
|
||||
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_));
|
||||
result.ptr(y)[x] = normAcc(num, denum);
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||
PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
float weight = 1.f / (w * h);
|
||||
float templ_sum_scale_r = templ_sum_r * weight;
|
||||
float templ_sum_scale_g = templ_sum_g * weight;
|
||||
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
|
||||
+ templ_sqsum_g - weight * templ_sum_g * templ_sum_g;
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_NORMED_8UC2<<<grid, threads, 0, stream>>>(
|
||||
w, h, weight,
|
||||
templ_sum_scale_r, templ_sum_scale_g,
|
||||
templ_sqsum_scale,
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3(
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||
float templ_sqsum_scale,
|
||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
||||
PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sum_r_ = (float)(
|
||||
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
|
||||
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
|
||||
float image_sqsum_r_ = (float)(
|
||||
(image_sqsum_r.ptr(y + h)[x + w] - image_sqsum_r.ptr(y)[x + w]) -
|
||||
(image_sqsum_r.ptr(y + h)[x] - image_sqsum_r.ptr(y)[x]));
|
||||
float image_sum_g_ = (float)(
|
||||
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
|
||||
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
|
||||
float image_sqsum_g_ = (float)(
|
||||
(image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) -
|
||||
(image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x]));
|
||||
float image_sum_b_ = (float)(
|
||||
(image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) -
|
||||
(image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x]));
|
||||
float image_sqsum_b_ = (float)(
|
||||
(image_sqsum_b.ptr(y + h)[x + w] - image_sqsum_b.ptr(y)[x + w]) -
|
||||
(image_sqsum_b.ptr(y + h)[x] - image_sqsum_b.ptr(y)[x]));
|
||||
|
||||
float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r
|
||||
- image_sum_g_ * templ_sum_scale_g
|
||||
- image_sum_b_ * templ_sum_scale_b;
|
||||
float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_
|
||||
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_
|
||||
+ image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_));
|
||||
result.ptr(y)[x] = normAcc(num, denum);
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
||||
PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
float weight = 1.f / (w * h);
|
||||
float templ_sum_scale_r = templ_sum_r * weight;
|
||||
float templ_sum_scale_g = templ_sum_g * weight;
|
||||
float templ_sum_scale_b = templ_sum_b * weight;
|
||||
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
|
||||
+ templ_sqsum_g - weight * templ_sum_g * templ_sum_g
|
||||
+ templ_sqsum_b - weight * templ_sum_b * templ_sum_b;
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_NORMED_8UC3<<<grid, threads, 0, stream>>>(
|
||||
w, h, weight,
|
||||
templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b,
|
||||
templ_sqsum_scale,
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
image_sum_b, image_sqsum_b,
|
||||
result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4(
|
||||
int w, int h, float weight,
|
||||
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
|
||||
float templ_sum_scale_a, float templ_sqsum_scale,
|
||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
||||
const PtrStep<unsigned int> image_sum_a, const PtrStep<unsigned long long> image_sqsum_a,
|
||||
PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sum_r_ = (float)(
|
||||
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
|
||||
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
|
||||
float image_sqsum_r_ = (float)(
|
||||
(image_sqsum_r.ptr(y + h)[x + w] - image_sqsum_r.ptr(y)[x + w]) -
|
||||
(image_sqsum_r.ptr(y + h)[x] - image_sqsum_r.ptr(y)[x]));
|
||||
float image_sum_g_ = (float)(
|
||||
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
|
||||
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
|
||||
float image_sqsum_g_ = (float)(
|
||||
(image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) -
|
||||
(image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x]));
|
||||
float image_sum_b_ = (float)(
|
||||
(image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) -
|
||||
(image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x]));
|
||||
float image_sqsum_b_ = (float)(
|
||||
(image_sqsum_b.ptr(y + h)[x + w] - image_sqsum_b.ptr(y)[x + w]) -
|
||||
(image_sqsum_b.ptr(y + h)[x] - image_sqsum_b.ptr(y)[x]));
|
||||
float image_sum_a_ = (float)(
|
||||
(image_sum_a.ptr(y + h)[x + w] - image_sum_a.ptr(y)[x + w]) -
|
||||
(image_sum_a.ptr(y + h)[x] - image_sum_a.ptr(y)[x]));
|
||||
float image_sqsum_a_ = (float)(
|
||||
(image_sqsum_a.ptr(y + h)[x + w] - image_sqsum_a.ptr(y)[x + w]) -
|
||||
(image_sqsum_a.ptr(y + h)[x] - image_sqsum_a.ptr(y)[x]));
|
||||
|
||||
float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r - image_sum_g_ * templ_sum_scale_g
|
||||
- image_sum_b_ * templ_sum_scale_b - image_sum_a_ * templ_sum_scale_a;
|
||||
float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_
|
||||
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_
|
||||
+ image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_
|
||||
+ image_sqsum_a_ - weight * image_sum_a_ * image_sum_a_));
|
||||
result.ptr(y)[x] = normAcc(num, denum);
|
||||
}
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
||||
const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
|
||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
||||
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
|
||||
PtrStepSzf result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
float weight = 1.f / (w * h);
|
||||
float templ_sum_scale_r = templ_sum_r * weight;
|
||||
float templ_sum_scale_g = templ_sum_g * weight;
|
||||
float templ_sum_scale_b = templ_sum_b * weight;
|
||||
float templ_sum_scale_a = templ_sum_a * weight;
|
||||
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
|
||||
+ templ_sqsum_g - weight * templ_sum_g * templ_sum_g
|
||||
+ templ_sqsum_b - weight * templ_sum_b * templ_sum_b
|
||||
+ templ_sqsum_a - weight * templ_sum_a * templ_sum_a;
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_NORMED_8UC4<<<grid, threads, 0, stream>>>(
|
||||
w, h, weight,
|
||||
templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b, templ_sum_scale_a,
|
||||
templ_sqsum_scale,
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
image_sum_b, image_sqsum_b,
|
||||
image_sum_a, image_sqsum_a,
|
||||
result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// normalize
|
||||
|
||||
template <int cn>
|
||||
__global__ void normalizeKernel_8U(
|
||||
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
||||
unsigned long long templ_sqsum, PtrStepSzf result)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
float image_sqsum_ = (float)(
|
||||
(image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) -
|
||||
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
|
||||
result.ptr(y)[x] = normAcc(result.ptr(y)[x], sqrtf(image_sqsum_ * templ_sqsum));
|
||||
}
|
||||
}
|
||||
|
||||
void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
|
||||
unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
switch (cn)
|
||||
{
|
||||
case 1:
|
||||
normalizeKernel_8U<1><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
|
||||
break;
|
||||
case 2:
|
||||
normalizeKernel_8U<2><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
|
||||
break;
|
||||
case 3:
|
||||
normalizeKernel_8U<3><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
|
||||
break;
|
||||
case 4:
|
||||
normalizeKernel_8U<4><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
|
||||
break;
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// extractFirstChannel
|
||||
|
||||
template <int cn>
|
||||
__global__ void extractFirstChannel_32F(const PtrStepb image, PtrStepSzf result)
|
||||
{
|
||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < result.cols && y < result.rows)
|
||||
{
|
||||
Typef val = ((const Typef*)image.ptr(y))[x];
|
||||
result.ptr(y)[x] = first(val);
|
||||
}
|
||||
}
|
||||
|
||||
void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
switch (cn)
|
||||
{
|
||||
case 1:
|
||||
extractFirstChannel_32F<1><<<grid, threads, 0, stream>>>(image, result);
|
||||
break;
|
||||
case 2:
|
||||
extractFirstChannel_32F<2><<<grid, threads, 0, stream>>>(image, result);
|
||||
break;
|
||||
case 3:
|
||||
extractFirstChannel_32F<3><<<grid, threads, 0, stream>>>(image, result);
|
||||
break;
|
||||
case 4:
|
||||
extractFirstChannel_32F<4><<<grid, threads, 0, stream>>>(image, result);
|
||||
break;
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
} //namespace match_template
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,569 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
//// Non Local Means Denosing
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
__device__ __forceinline__ float norm2(const float& v) { return v*v; }
|
||||
__device__ __forceinline__ float norm2(const float2& v) { return v.x*v.x + v.y*v.y; }
|
||||
__device__ __forceinline__ float norm2(const float3& v) { return v.x*v.x + v.y*v.y + v.z*v.z; }
|
||||
__device__ __forceinline__ float norm2(const float4& v) { return v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w; }
|
||||
|
||||
template<typename T, typename B>
|
||||
__global__ void nlm_kernel(const PtrStep<T> src, PtrStepSz<T> dst, const B b, int search_radius, int block_radius, float noise_mult)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||
|
||||
const int i = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
const int j = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (j >= dst.cols || i >= dst.rows)
|
||||
return;
|
||||
|
||||
int bsize = search_radius + block_radius;
|
||||
int search_window = 2 * search_radius + 1;
|
||||
float minus_search_window2_inv = -1.f/(search_window * search_window);
|
||||
|
||||
value_type sum1 = VecTraits<value_type>::all(0);
|
||||
float sum2 = 0.f;
|
||||
|
||||
if (j - bsize >= 0 && j + bsize < dst.cols && i - bsize >= 0 && i + bsize < dst.rows)
|
||||
{
|
||||
for(float y = -search_radius; y <= search_radius; ++y)
|
||||
for(float x = -search_radius; x <= search_radius; ++x)
|
||||
{
|
||||
float dist2 = 0;
|
||||
for(float ty = -block_radius; ty <= block_radius; ++ty)
|
||||
for(float tx = -block_radius; tx <= block_radius; ++tx)
|
||||
{
|
||||
value_type bv = saturate_cast<value_type>(src(i + y + ty, j + x + tx));
|
||||
value_type av = saturate_cast<value_type>(src(i + ty, j + tx));
|
||||
|
||||
dist2 += norm2(av - bv);
|
||||
}
|
||||
|
||||
float w = __expf(dist2 * noise_mult + (x * x + y * y) * minus_search_window2_inv);
|
||||
|
||||
/*if (i == 255 && j == 255)
|
||||
printf("%f %f\n", w, dist2 * minus_h2_inv + (x * x + y * y) * minus_search_window2_inv);*/
|
||||
|
||||
sum1 = sum1 + w * saturate_cast<value_type>(src(i + y, j + x));
|
||||
sum2 += w;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(float y = -search_radius; y <= search_radius; ++y)
|
||||
for(float x = -search_radius; x <= search_radius; ++x)
|
||||
{
|
||||
float dist2 = 0;
|
||||
for(float ty = -block_radius; ty <= block_radius; ++ty)
|
||||
for(float tx = -block_radius; tx <= block_radius; ++tx)
|
||||
{
|
||||
value_type bv = saturate_cast<value_type>(b.at(i + y + ty, j + x + tx, src));
|
||||
value_type av = saturate_cast<value_type>(b.at(i + ty, j + tx, src));
|
||||
dist2 += norm2(av - bv);
|
||||
}
|
||||
|
||||
float w = __expf(dist2 * noise_mult + (x * x + y * y) * minus_search_window2_inv);
|
||||
|
||||
sum1 = sum1 + w * saturate_cast<value_type>(b.at(i + y, j + x, src));
|
||||
sum2 += w;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
dst(i, j) = saturate_cast<T>(sum1 / sum2);
|
||||
|
||||
}
|
||||
|
||||
template<typename T, template <typename> class B>
|
||||
void nlm_caller(const PtrStepSzb src, PtrStepSzb dst, int search_radius, int block_radius, float h, cudaStream_t stream)
|
||||
{
|
||||
dim3 block (32, 8);
|
||||
dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
|
||||
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
int block_window = 2 * block_radius + 1;
|
||||
float minus_h2_inv = -1.f/(h * h * VecTraits<T>::cn);
|
||||
float noise_mult = minus_h2_inv/(block_window * block_window);
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig (nlm_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
nlm_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, search_radius, block_radius, noise_mult);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void nlm_bruteforce_gpu(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(const PtrStepSzb src, PtrStepSzb dst, int search_radius, int block_radius, float h, cudaStream_t stream);
|
||||
|
||||
static func_t funcs[] =
|
||||
{
|
||||
nlm_caller<T, BrdReflect101>,
|
||||
nlm_caller<T, BrdReplicate>,
|
||||
nlm_caller<T, BrdConstant>,
|
||||
nlm_caller<T, BrdReflect>,
|
||||
nlm_caller<T, BrdWrap>,
|
||||
};
|
||||
funcs[borderMode](src, dst, search_radius, block_radius, h, stream);
|
||||
}
|
||||
|
||||
template void nlm_bruteforce_gpu<uchar>(const PtrStepSzb&, PtrStepSzb, int, int, float, int, cudaStream_t);
|
||||
template void nlm_bruteforce_gpu<uchar2>(const PtrStepSzb&, PtrStepSzb, int, int, float, int, cudaStream_t);
|
||||
template void nlm_bruteforce_gpu<uchar3>(const PtrStepSzb&, PtrStepSzb, int, int, float, int, cudaStream_t);
|
||||
}
|
||||
}}}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
//// Non Local Means Denosing (fast approximate version)
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
|
||||
template <int cn> struct Unroll;
|
||||
template <> struct Unroll<1>
|
||||
{
|
||||
template <int BLOCK_SIZE>
|
||||
static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*> smem_tuple(float* smem)
|
||||
{
|
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ thrust::tuple<float&, float&> tie(float& val1, float& val2)
|
||||
{
|
||||
return thrust::tie(val1, val2);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ const thrust::tuple<plus<float>, plus<float> > op()
|
||||
{
|
||||
plus<float> op;
|
||||
return thrust::make_tuple(op, op);
|
||||
}
|
||||
};
|
||||
template <> struct Unroll<2>
|
||||
{
|
||||
template <int BLOCK_SIZE>
|
||||
static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
|
||||
{
|
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ thrust::tuple<float&, float&, float&> tie(float& val1, float2& val2)
|
||||
{
|
||||
return thrust::tie(val1, val2.x, val2.y);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ const thrust::tuple<plus<float>, plus<float>, plus<float> > op()
|
||||
{
|
||||
plus<float> op;
|
||||
return thrust::make_tuple(op, op, op);
|
||||
}
|
||||
};
|
||||
template <> struct Unroll<3>
|
||||
{
|
||||
template <int BLOCK_SIZE>
|
||||
static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
|
||||
{
|
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ thrust::tuple<float&, float&, float&, float&> tie(float& val1, float3& val2)
|
||||
{
|
||||
return thrust::tie(val1, val2.x, val2.y, val2.z);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ const thrust::tuple<plus<float>, plus<float>, plus<float>, plus<float> > op()
|
||||
{
|
||||
plus<float> op;
|
||||
return thrust::make_tuple(op, op, op, op);
|
||||
}
|
||||
};
|
||||
template <> struct Unroll<4>
|
||||
{
|
||||
template <int BLOCK_SIZE>
|
||||
static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
|
||||
{
|
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE, smem + 4 * BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ thrust::tuple<float&, float&, float&, float&, float&> tie(float& val1, float4& val2)
|
||||
{
|
||||
return thrust::tie(val1, val2.x, val2.y, val2.z, val2.w);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ const thrust::tuple<plus<float>, plus<float>, plus<float>, plus<float>, plus<float> > op()
|
||||
{
|
||||
plus<float> op;
|
||||
return thrust::make_tuple(op, op, op, op, op);
|
||||
}
|
||||
};
|
||||
|
||||
__device__ __forceinline__ int calcDist(const uchar& a, const uchar& b) { return (a-b)*(a-b); }
|
||||
__device__ __forceinline__ int calcDist(const uchar2& a, const uchar2& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y); }
|
||||
__device__ __forceinline__ int calcDist(const uchar3& a, const uchar3& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y) + (a.z-b.z)*(a.z-b.z); }
|
||||
|
||||
template <class T> struct FastNonLocalMenas
|
||||
{
|
||||
enum
|
||||
{
|
||||
CTA_SIZE = 128,
|
||||
|
||||
TILE_COLS = 128,
|
||||
TILE_ROWS = 32,
|
||||
|
||||
STRIDE = CTA_SIZE
|
||||
};
|
||||
|
||||
struct plus
|
||||
{
|
||||
__device__ __forceinline__ float operator()(float v1, float v2) const { return v1 + v2; }
|
||||
};
|
||||
|
||||
int search_radius;
|
||||
int block_radius;
|
||||
|
||||
int search_window;
|
||||
int block_window;
|
||||
float minus_h2_inv;
|
||||
|
||||
FastNonLocalMenas(int search_window_, int block_window_, float h) : search_radius(search_window_/2), block_radius(block_window_/2),
|
||||
search_window(search_window_), block_window(block_window_), minus_h2_inv(-1.f/(h * h * VecTraits<T>::cn)) {}
|
||||
|
||||
PtrStep<T> src;
|
||||
mutable PtrStepi buffer;
|
||||
|
||||
__device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
|
||||
{
|
||||
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
dist_sums[index] = 0;
|
||||
|
||||
for(int tx = 0; tx < block_window; ++tx)
|
||||
col_sums(tx, index) = 0;
|
||||
|
||||
int y = index / search_window;
|
||||
int x = index - y * search_window;
|
||||
|
||||
int ay = i;
|
||||
int ax = j;
|
||||
|
||||
int by = i + y - search_radius;
|
||||
int bx = j + x - search_radius;
|
||||
|
||||
#if 1
|
||||
for (int tx = -block_radius; tx <= block_radius; ++tx)
|
||||
{
|
||||
int col_sum = 0;
|
||||
for (int ty = -block_radius; ty <= block_radius; ++ty)
|
||||
{
|
||||
int dist = calcDist(src(ay + ty, ax + tx), src(by + ty, bx + tx));
|
||||
|
||||
dist_sums[index] += dist;
|
||||
col_sum += dist;
|
||||
}
|
||||
col_sums(tx + block_radius, index) = col_sum;
|
||||
}
|
||||
#else
|
||||
for (int ty = -block_radius; ty <= block_radius; ++ty)
|
||||
for (int tx = -block_radius; tx <= block_radius; ++tx)
|
||||
{
|
||||
int dist = calcDist(src(ay + ty, ax + tx), src(by + ty, bx + tx));
|
||||
|
||||
dist_sums[index] += dist;
|
||||
col_sums(tx + block_radius, index) += dist;
|
||||
}
|
||||
#endif
|
||||
|
||||
up_col_sums(j, index) = col_sums(block_window - 1, index);
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
|
||||
{
|
||||
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
int y = index / search_window;
|
||||
int x = index - y * search_window;
|
||||
|
||||
int ay = i;
|
||||
int ax = j + block_radius;
|
||||
|
||||
int by = i + y - search_radius;
|
||||
int bx = j + x - search_radius + block_radius;
|
||||
|
||||
int col_sum = 0;
|
||||
|
||||
for (int ty = -block_radius; ty <= block_radius; ++ty)
|
||||
col_sum += calcDist(src(ay + ty, ax), src(by + ty, bx));
|
||||
|
||||
dist_sums[index] += col_sum - col_sums(first, index);
|
||||
|
||||
col_sums(first, index) = col_sum;
|
||||
up_col_sums(j, index) = col_sum;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
|
||||
{
|
||||
int ay = i;
|
||||
int ax = j + block_radius;
|
||||
|
||||
T a_up = src(ay - block_radius - 1, ax);
|
||||
T a_down = src(ay + block_radius, ax);
|
||||
|
||||
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
int y = index / search_window;
|
||||
int x = index - y * search_window;
|
||||
|
||||
int by = i + y - search_radius;
|
||||
int bx = j + x - search_radius + block_radius;
|
||||
|
||||
T b_up = src(by - block_radius - 1, bx);
|
||||
T b_down = src(by + block_radius, bx);
|
||||
|
||||
int col_sum = up_col_sums(j, index) + calcDist(a_down, b_down) - calcDist(a_up, b_up);
|
||||
|
||||
dist_sums[index] += col_sum - col_sums(first, index);
|
||||
col_sums(first, index) = col_sum;
|
||||
up_col_sums(j, index) = col_sum;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums, T& dst) const
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_type;
|
||||
|
||||
float weights_sum = 0;
|
||||
sum_type sum = VecTraits<sum_type>::all(0);
|
||||
|
||||
float bw2_inv = 1.f/(block_window * block_window);
|
||||
|
||||
int sx = j - search_radius;
|
||||
int sy = i - search_radius;
|
||||
|
||||
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
int y = index / search_window;
|
||||
int x = index - y * search_window;
|
||||
|
||||
float avg_dist = dist_sums[index] * bw2_inv;
|
||||
float weight = __expf(avg_dist * minus_h2_inv);
|
||||
weights_sum += weight;
|
||||
|
||||
sum = sum + weight * saturate_cast<sum_type>(src(sy + y, sx + x));
|
||||
}
|
||||
|
||||
__shared__ float cta_buffer[CTA_SIZE * (VecTraits<T>::cn + 1)];
|
||||
|
||||
reduce<CTA_SIZE>(Unroll<VecTraits<T>::cn>::template smem_tuple<CTA_SIZE>(cta_buffer),
|
||||
Unroll<VecTraits<T>::cn>::tie(weights_sum, sum),
|
||||
threadIdx.x,
|
||||
Unroll<VecTraits<T>::cn>::op());
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
dst = saturate_cast<T>(sum / weights_sum);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void operator()(PtrStepSz<T>& dst) const
|
||||
{
|
||||
int tbx = blockIdx.x * TILE_COLS;
|
||||
int tby = blockIdx.y * TILE_ROWS;
|
||||
|
||||
int tex = ::min(tbx + TILE_COLS, dst.cols);
|
||||
int tey = ::min(tby + TILE_ROWS, dst.rows);
|
||||
|
||||
PtrStepi col_sums;
|
||||
col_sums.data = buffer.ptr(dst.cols + blockIdx.x * block_window) + blockIdx.y * search_window * search_window;
|
||||
col_sums.step = buffer.step;
|
||||
|
||||
PtrStepi up_col_sums;
|
||||
up_col_sums.data = buffer.data + blockIdx.y * search_window * search_window;
|
||||
up_col_sums.step = buffer.step;
|
||||
|
||||
extern __shared__ int dist_sums[]; //search_window * search_window
|
||||
|
||||
int first = 0;
|
||||
|
||||
for (int i = tby; i < tey; ++i)
|
||||
for (int j = tbx; j < tex; ++j)
|
||||
{
|
||||
__syncthreads();
|
||||
|
||||
if (j == tbx)
|
||||
{
|
||||
initSums_BruteForce(i, j, dist_sums, col_sums, up_col_sums);
|
||||
first = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i == tby)
|
||||
shiftRight_FirstRow(i, j, first, dist_sums, col_sums, up_col_sums);
|
||||
else
|
||||
shiftRight_UpSums(i, j, first, dist_sums, col_sums, up_col_sums);
|
||||
|
||||
first = (first + 1) % block_window;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
convolve_window(i, j, dist_sums, col_sums, up_col_sums, dst(i, j));
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
__global__ void fast_nlm_kernel(const FastNonLocalMenas<T> fnlm, PtrStepSz<T> dst) { fnlm(dst); }
|
||||
|
||||
void nln_fast_get_buffer_size(const PtrStepSzb& src, int search_window, int block_window, int& buffer_cols, int& buffer_rows)
|
||||
{
|
||||
typedef FastNonLocalMenas<uchar> FNLM;
|
||||
dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS));
|
||||
|
||||
buffer_cols = search_window * search_window * grid.y;
|
||||
buffer_rows = src.cols + block_window * grid.x;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void nlm_fast_gpu(const PtrStepSzb& src, PtrStepSzb dst, PtrStepi buffer,
|
||||
int search_window, int block_window, float h, cudaStream_t stream)
|
||||
{
|
||||
typedef FastNonLocalMenas<T> FNLM;
|
||||
FNLM fnlm(search_window, block_window, h);
|
||||
|
||||
fnlm.src = (PtrStepSz<T>)src;
|
||||
fnlm.buffer = buffer;
|
||||
|
||||
dim3 block(FNLM::CTA_SIZE, 1);
|
||||
dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS));
|
||||
int smem = search_window * search_window * sizeof(int);
|
||||
|
||||
|
||||
fast_nlm_kernel<<<grid, block, smem>>>(fnlm, (PtrStepSz<T>)dst);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void nlm_fast_gpu<uchar>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
|
||||
template void nlm_fast_gpu<uchar2>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
|
||||
template void nlm_fast_gpu<uchar3>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
|
||||
|
||||
|
||||
|
||||
__global__ void fnlm_split_kernel(const PtrStepSz<uchar3> lab, PtrStepb l, PtrStep<uchar2> ab)
|
||||
{
|
||||
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
|
||||
if (x < lab.cols && y < lab.rows)
|
||||
{
|
||||
uchar3 p = lab(y, x);
|
||||
ab(y,x) = make_uchar2(p.y, p.z);
|
||||
l(y,x) = p.x;
|
||||
}
|
||||
}
|
||||
|
||||
void fnlm_split_channels(const PtrStepSz<uchar3>& lab, PtrStepb l, PtrStep<uchar2> ab, cudaStream_t stream)
|
||||
{
|
||||
dim3 b(32, 8);
|
||||
dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y));
|
||||
|
||||
fnlm_split_kernel<<<g, b>>>(lab, l, ab);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void fnlm_merge_kernel(const PtrStepb l, const PtrStep<uchar2> ab, PtrStepSz<uchar3> lab)
|
||||
{
|
||||
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
|
||||
if (x < lab.cols && y < lab.rows)
|
||||
{
|
||||
uchar2 p = ab(y, x);
|
||||
lab(y, x) = make_uchar3(l(y, x), p.x, p.y);
|
||||
}
|
||||
}
|
||||
|
||||
void fnlm_merge_channels(const PtrStepb& l, const PtrStep<uchar2>& ab, PtrStepSz<uchar3> lab, cudaStream_t stream)
|
||||
{
|
||||
dim3 b(32, 8);
|
||||
dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y));
|
||||
|
||||
fnlm_merge_kernel<<<g, b>>>(l, ab, lab);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,228 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T, typename B> __global__ void pyrDown(const PtrStepSz<T> src, PtrStep<T> dst, const B b, int dst_cols)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_t;
|
||||
|
||||
__shared__ work_t smem[256 + 4];
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y;
|
||||
|
||||
const int src_y = 2 * y;
|
||||
|
||||
if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2)
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, x);
|
||||
sum = sum + 0.25f * src(src_y - 1, x);
|
||||
sum = sum + 0.375f * src(src_y , x);
|
||||
sum = sum + 0.25f * src(src_y + 1, x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, x);
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, left_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, left_x);
|
||||
sum = sum + 0.375f * src(src_y , left_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, left_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, left_x);
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, right_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, right_x);
|
||||
sum = sum + 0.375f * src(src_y , right_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, right_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, right_x);
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x));
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x));
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x));
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < 128)
|
||||
{
|
||||
const int tid2 = threadIdx.x * 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
|
||||
|
||||
if (dst_x < dst_cols)
|
||||
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(256);
|
||||
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
||||
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,196 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
__shared__ sum_t s_srcPatch[10][10];
|
||||
__shared__ sum_t s_dstPatch[20][16];
|
||||
|
||||
if (threadIdx.x < 10 && threadIdx.y < 10)
|
||||
{
|
||||
int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
|
||||
int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
|
||||
|
||||
srcx = ::abs(srcx);
|
||||
srcx = ::min(src.cols - 1, srcx);
|
||||
|
||||
srcy = ::abs(srcy);
|
||||
srcy = ::min(src.rows - 1, srcy);
|
||||
|
||||
s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
|
||||
const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0);
|
||||
const bool eveny = ((threadIdx.y & 1) == 0);
|
||||
const int tidx = threadIdx.x;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
|
||||
|
||||
if (threadIdx.y < 2)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.y > 13)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int tidy = threadIdx.y;
|
||||
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x];
|
||||
sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x];
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
dst(y, x) = saturate_cast<T>(4.0f * sum);
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
pyrUp<<<grid, block, 0, stream>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,274 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float xcoo = mapx.ptr(y)[x];
|
||||
const float ycoo = mapy.ptr(y)[x];
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
|
||||
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_remap_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float* borderValue, bool cc20) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float*, bool) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
if (stream == 0)
|
||||
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
|
||||
else
|
||||
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const caller_t callers[3][5] =
|
||||
{
|
||||
{
|
||||
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<LinearFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<CubicFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdWrap, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
callers[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void remap_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void remap_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,302 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <cfloat>
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/filters.hpp"
|
||||
#include "opencv2/core/cuda/scan.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float xcoo = x * fx;
|
||||
const float ycoo = y * fy;
|
||||
|
||||
dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
dst(y, x) = saturate_cast<T>(src(y, x));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
|
||||
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
|
||||
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
|
||||
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_resize_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
const int xoff; \
|
||||
const int yoff; \
|
||||
__host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_resize_ ## type, srcWhole); \
|
||||
tex_resize_ ## type ## _reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate< type > brd(src.rows, src.cols); \
|
||||
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
|
||||
else
|
||||
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcher<AreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
int iscale_x = (int)round(fx);
|
||||
int iscale_y = (int)round(fy);
|
||||
|
||||
if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
|
||||
ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
else
|
||||
ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
PtrStepSzb dst, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[4] =
|
||||
{
|
||||
ResizeDispatcher<PointFilter, T>::call,
|
||||
ResizeDispatcher<LinearFilter, T>::call,
|
||||
ResizeDispatcher<CubicFilter, T>::call,
|
||||
ResizeDispatcher<AreaFilter, T>::call
|
||||
};
|
||||
// chenge to linear if area interpolation upscaling
|
||||
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
|
||||
interpolation = 1;
|
||||
|
||||
callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
|
||||
static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template<typename T> struct scan_traits{};
|
||||
|
||||
template<> struct scan_traits<uchar>
|
||||
{
|
||||
typedef float scan_line_type;
|
||||
};
|
||||
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,389 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
__constant__ float c_warpMat[3 * 3];
|
||||
|
||||
struct AffineTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
|
||||
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
struct PerspectiveTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
|
||||
|
||||
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
|
||||
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Build Maps
|
||||
|
||||
template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < xmap.cols && y < xmap.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
xmap(y, x) = coord.x;
|
||||
ymap(y, x) = coord.y;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
|
||||
|
||||
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Warp
|
||||
|
||||
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, bool)
|
||||
{
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
(void)srcWhole;
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_WARP_TEX(type) \
|
||||
texture< type , cudaTextureType2D > tex_warp_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_warp_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_warp_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_warp_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, bool cc20) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, bool) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
if (stream == 0)
|
||||
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc20);
|
||||
else
|
||||
WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc20);
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, typename T>
|
||||
void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const func_t funcs[3][5] =
|
||||
{
|
||||
{
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,274 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __cvt_color_internal_h__
|
||||
#define __cvt_color_internal_h__
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
#define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
|
||||
void name(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
#define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _16u) \
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f)
|
||||
|
||||
#define OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(name) \
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f)
|
||||
|
||||
#define OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(name) \
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f) \
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _full_8u) \
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _full_32f)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_bgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_rgba)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr_to_bgr555)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr_to_bgr565)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgb_to_bgr555)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgb_to_bgr565)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgra_to_bgr555)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgra_to_bgr565)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgba_to_bgr555)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgba_to_bgr565)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_bgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_bgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(gray_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(gray_to_bgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(gray_to_bgr555)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(gray_to_bgr565)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_gray)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_gray)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_gray)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_gray)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_gray)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_gray)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_yuv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_yuv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_yuv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_yuv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_yuv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_yuv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_yuv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_yuv4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_bgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_YCrCb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_YCrCb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_YCrCb4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_YCrCb4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_YCrCb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_YCrCb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_YCrCb4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_YCrCb4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_bgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_bgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_xyz)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_xyz)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_xyz4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_xyz4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_xyz)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_xyz)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_xyz4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_xyz4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_bgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_bgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgb_to_hsv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgba_to_hsv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgb_to_hsv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgba_to_hsv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgr_to_hsv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgra_to_hsv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgr_to_hsv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgra_to_hsv4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv4_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv4_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv_to_bgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv4_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgb_to_hls)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgba_to_hls)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgb_to_hls4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgba_to_hls4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgr_to_hls)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgra_to_hls)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgr_to_hls4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgra_to_hls4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls4_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls4_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls_to_bgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls4_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls4_to_bgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_lab)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_lab)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_lab4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_lab4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_lab)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_lab)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_lab4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_lab4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgb_to_lab)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgba_to_lab)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgb_to_lab4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgba_to_lab4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgr_to_lab)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgra_to_lab)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgr_to_lab4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgra_to_lab4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_bgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_bgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_lrgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_lrgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_lrgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_lrgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_lbgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_lbgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_lbgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_lbgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_luv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_luv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_luv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_luv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_luv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_luv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_luv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_luv4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgb_to_luv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgba_to_luv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgb_to_luv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgba_to_luv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgr_to_luv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgra_to_luv)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgr_to_luv4)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgra_to_luv4)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_rgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_rgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_bgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_bgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_lrgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_lrgb)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_lrgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_lrgba)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_lbgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_lbgr)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_lbgra)
|
||||
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_lbgra)
|
||||
|
||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ONE
|
||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ALL
|
||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_8U32F
|
||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL
|
||||
}}}
|
||||
|
||||
#endif
|
@@ -1,198 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::bilateralFilter(const GpuMat&, GpuMat&, int, float, float, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat&, GpuMat&, float, float, int, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
|
||||
#else
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
//// Non Local Means Denosing (brute force)
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template<typename T>
|
||||
void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t stream);
|
||||
|
||||
template<typename T>
|
||||
void nlm_bruteforce_gpu(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& s)
|
||||
{
|
||||
using cv::gpu::cudev::imgproc::bilateral_filter_gpu;
|
||||
|
||||
typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t s);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{bilateral_filter_gpu<uchar> , 0 /*bilateral_filter_gpu<uchar2>*/ , bilateral_filter_gpu<uchar3> , bilateral_filter_gpu<uchar4> },
|
||||
{0 /*bilateral_filter_gpu<schar>*/, 0 /*bilateral_filter_gpu<schar2>*/ , 0 /*bilateral_filter_gpu<schar3>*/, 0 /*bilateral_filter_gpu<schar4>*/},
|
||||
{bilateral_filter_gpu<ushort> , 0 /*bilateral_filter_gpu<ushort2>*/, bilateral_filter_gpu<ushort3> , bilateral_filter_gpu<ushort4> },
|
||||
{bilateral_filter_gpu<short> , 0 /*bilateral_filter_gpu<short2>*/ , bilateral_filter_gpu<short3> , bilateral_filter_gpu<short4> },
|
||||
{0 /*bilateral_filter_gpu<int>*/ , 0 /*bilateral_filter_gpu<int2>*/ , 0 /*bilateral_filter_gpu<int3>*/ , 0 /*bilateral_filter_gpu<int4>*/ },
|
||||
{bilateral_filter_gpu<float> , 0 /*bilateral_filter_gpu<float2>*/ , bilateral_filter_gpu<float3> , bilateral_filter_gpu<float4> }
|
||||
};
|
||||
|
||||
sigma_color = (sigma_color <= 0 ) ? 1 : sigma_color;
|
||||
sigma_spatial = (sigma_spatial <= 0 ) ? 1 : sigma_spatial;
|
||||
|
||||
|
||||
int radius = (kernel_size <= 0) ? cvRound(sigma_spatial*1.5) : kernel_size/2;
|
||||
kernel_size = std::max(radius, 1)*2 + 1;
|
||||
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
|
||||
|
||||
int gpuBorderType;
|
||||
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
func(src, dst, kernel_size, sigma_spatial, sigma_color, gpuBorderType, StreamAccessor::getStream(s));
|
||||
}
|
||||
|
||||
void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, int borderMode, Stream& s)
|
||||
{
|
||||
using cv::gpu::cudev::imgproc::nlm_bruteforce_gpu;
|
||||
typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[4] = { nlm_bruteforce_gpu<uchar>, nlm_bruteforce_gpu<uchar2>, nlm_bruteforce_gpu<uchar3>, 0/*nlm_bruteforce_gpu<uchar4>,*/ };
|
||||
|
||||
CV_Assert(src.type() == CV_8U || src.type() == CV_8UC2 || src.type() == CV_8UC3);
|
||||
|
||||
const func_t func = funcs[src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
int b = borderMode;
|
||||
CV_Assert(b == BORDER_REFLECT101 || b == BORDER_REPLICATE || b == BORDER_CONSTANT || b == BORDER_REFLECT || b == BORDER_WRAP);
|
||||
|
||||
int gpuBorderType;
|
||||
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
func(src, dst, search_window/2, block_window/2, h, gpuBorderType, StreamAccessor::getStream(s));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
//// Non Local Means Denosing (fast approxinate)
|
||||
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void nln_fast_get_buffer_size(const PtrStepSzb& src, int search_window, int block_window, int& buffer_cols, int& buffer_rows);
|
||||
|
||||
template<typename T>
|
||||
void nlm_fast_gpu(const PtrStepSzb& src, PtrStepSzb dst, PtrStepi buffer,
|
||||
int search_window, int block_window, float h, cudaStream_t stream);
|
||||
|
||||
void fnlm_split_channels(const PtrStepSz<uchar3>& lab, PtrStepb l, PtrStep<uchar2> ab, cudaStream_t stream);
|
||||
void fnlm_merge_channels(const PtrStepb& l, const PtrStep<uchar2>& ab, PtrStepSz<uchar3> lab, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s)
|
||||
{
|
||||
CV_Assert(src.depth() == CV_8U && src.channels() < 4);
|
||||
|
||||
int border_size = search_window/2 + block_window/2;
|
||||
Size esize = src.size() + Size(border_size, border_size) * 2;
|
||||
|
||||
cv::gpu::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
|
||||
GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step);
|
||||
|
||||
cv::gpu::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s);
|
||||
GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size()));
|
||||
|
||||
int bcols, brows;
|
||||
cudev::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows);
|
||||
buffer.create(brows, bcols, CV_32S);
|
||||
|
||||
using namespace cv::gpu::cudev::imgproc;
|
||||
typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
|
||||
static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s));
|
||||
}
|
||||
|
||||
void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC3);
|
||||
|
||||
lab.create(src.size(), src.type());
|
||||
cv::gpu::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, s);
|
||||
|
||||
l.create(src.size(), CV_8U);
|
||||
ab.create(src.size(), CV_8UC2);
|
||||
cudev::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(s));
|
||||
|
||||
simpleMethod(l, l, h_luminance, search_window, block_window, s);
|
||||
simpleMethod(ab, ab, h_color, search_window, block_window, s);
|
||||
|
||||
cudev::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(s));
|
||||
cv::gpu::cvtColor(lab, dst, cv::COLOR_Lab2BGR, 0, s);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -1,169 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat&, GpuMat&, const GpuMat&) { throw_no_cuda(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace gfft
|
||||
{
|
||||
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count);
|
||||
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::GoodFeaturesToTrackDetector_GPU::operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask)
|
||||
{
|
||||
using namespace cv::gpu::cudev::gfft;
|
||||
|
||||
CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
|
||||
|
||||
ensureSizeIsEnough(image.size(), CV_32F, eig_);
|
||||
|
||||
if (useHarrisDetector)
|
||||
cornerHarris(image, eig_, Dx_, Dy_, buf_, blockSize, 3, harrisK);
|
||||
else
|
||||
cornerMinEigenVal(image, eig_, Dx_, Dy_, buf_, blockSize, 3);
|
||||
|
||||
double maxVal = 0;
|
||||
minMax(eig_, 0, &maxVal, GpuMat(), minMaxbuf_);
|
||||
|
||||
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
|
||||
|
||||
int total = findCorners_gpu(eig_, static_cast<float>(maxVal * qualityLevel), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols);
|
||||
|
||||
if (total == 0)
|
||||
{
|
||||
corners.release();
|
||||
return;
|
||||
}
|
||||
|
||||
sortCorners_gpu(eig_, tmpCorners_.ptr<float2>(), total);
|
||||
|
||||
if (minDistance < 1)
|
||||
tmpCorners_.colRange(0, maxCorners > 0 ? std::min(maxCorners, total) : total).copyTo(corners);
|
||||
else
|
||||
{
|
||||
std::vector<Point2f> tmp(total);
|
||||
Mat tmpMat(1, total, CV_32FC2, (void*)&tmp[0]);
|
||||
tmpCorners_.colRange(0, total).download(tmpMat);
|
||||
|
||||
std::vector<Point2f> tmp2;
|
||||
tmp2.reserve(total);
|
||||
|
||||
const int cell_size = cvRound(minDistance);
|
||||
const int grid_width = (image.cols + cell_size - 1) / cell_size;
|
||||
const int grid_height = (image.rows + cell_size - 1) / cell_size;
|
||||
|
||||
std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
|
||||
|
||||
for (int i = 0; i < total; ++i)
|
||||
{
|
||||
Point2f p = tmp[i];
|
||||
|
||||
bool good = true;
|
||||
|
||||
int x_cell = static_cast<int>(p.x / cell_size);
|
||||
int y_cell = static_cast<int>(p.y / cell_size);
|
||||
|
||||
int x1 = x_cell - 1;
|
||||
int y1 = y_cell - 1;
|
||||
int x2 = x_cell + 1;
|
||||
int y2 = y_cell + 1;
|
||||
|
||||
// boundary check
|
||||
x1 = std::max(0, x1);
|
||||
y1 = std::max(0, y1);
|
||||
x2 = std::min(grid_width - 1, x2);
|
||||
y2 = std::min(grid_height - 1, y2);
|
||||
|
||||
for (int yy = y1; yy <= y2; yy++)
|
||||
{
|
||||
for (int xx = x1; xx <= x2; xx++)
|
||||
{
|
||||
std::vector<Point2f>& m = grid[yy * grid_width + xx];
|
||||
|
||||
if (!m.empty())
|
||||
{
|
||||
for(size_t j = 0; j < m.size(); j++)
|
||||
{
|
||||
float dx = p.x - m[j].x;
|
||||
float dy = p.y - m[j].y;
|
||||
|
||||
if (dx * dx + dy * dy < minDistance * minDistance)
|
||||
{
|
||||
good = false;
|
||||
goto break_out;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break_out:
|
||||
|
||||
if(good)
|
||||
{
|
||||
grid[y_cell * grid_width + x_cell].push_back(p);
|
||||
|
||||
tmp2.push_back(p);
|
||||
|
||||
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
@@ -1,282 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::labelComponents(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace ccl
|
||||
{
|
||||
void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream);
|
||||
|
||||
template<typename T>
|
||||
void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
static float4 scalarToCudaType(const cv::Scalar& in)
|
||||
{
|
||||
return make_float4((float)in[0], (float)in[1], (float)in[2], (float)in[3]);
|
||||
}
|
||||
|
||||
void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s)
|
||||
{
|
||||
CV_Assert(!image.empty());
|
||||
|
||||
int ch = image.channels();
|
||||
CV_Assert(ch <= 4);
|
||||
|
||||
int depth = image.depth();
|
||||
|
||||
typedef void (*func_t)(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
|
||||
static const func_t suppotLookup[8][4] =
|
||||
{ // 1, 2, 3, 4
|
||||
{ cudev::ccl::computeEdges<uchar>, 0, cudev::ccl::computeEdges<uchar3>, cudev::ccl::computeEdges<uchar4> },// CV_8U
|
||||
{ 0, 0, 0, 0 },// CV_16U
|
||||
{ cudev::ccl::computeEdges<ushort>, 0, cudev::ccl::computeEdges<ushort3>, cudev::ccl::computeEdges<ushort4> },// CV_8S
|
||||
{ 0, 0, 0, 0 },// CV_16S
|
||||
{ cudev::ccl::computeEdges<int>, 0, 0, 0 },// CV_32S
|
||||
{ cudev::ccl::computeEdges<float>, 0, 0, 0 },// CV_32F
|
||||
{ 0, 0, 0, 0 },// CV_64F
|
||||
{ 0, 0, 0, 0 } // CV_USRTYPE1
|
||||
};
|
||||
|
||||
func_t f = suppotLookup[depth][ch - 1];
|
||||
CV_Assert(f);
|
||||
|
||||
if (image.size() != mask.size() || mask.type() != CV_8UC1)
|
||||
mask.create(image.size(), CV_8UC1);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
float4 culo = scalarToCudaType(lo), cuhi = scalarToCudaType(hi);
|
||||
f(image, mask, culo, cuhi, stream);
|
||||
}
|
||||
|
||||
void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s)
|
||||
{
|
||||
CV_Assert(!mask.empty() && mask.type() == CV_8U);
|
||||
|
||||
if (!deviceSupports(SHARED_ATOMICS))
|
||||
CV_Error(cv::Error::StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
|
||||
|
||||
components.create(mask.size(), CV_32SC1);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
cudev::ccl::labelComponents(mask, components, flags, stream);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppState, Npp8u* pDeviceMem);
|
||||
|
||||
class NppiGraphcutStateHandler
|
||||
{
|
||||
public:
|
||||
NppiGraphcutStateHandler(NppiSize sznpp, Npp8u* pDeviceMem, const init_func_t func)
|
||||
{
|
||||
nppSafeCall( func(sznpp, &pState, pDeviceMem) );
|
||||
}
|
||||
|
||||
~NppiGraphcutStateHandler()
|
||||
{
|
||||
nppSafeCall( nppiGraphcutFree(pState) );
|
||||
}
|
||||
|
||||
operator NppiGraphcutState*()
|
||||
{
|
||||
return pState;
|
||||
}
|
||||
|
||||
private:
|
||||
NppiGraphcutState* pState;
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
|
||||
{
|
||||
#if (CUDA_VERSION < 5000)
|
||||
CV_Assert(terminals.type() == CV_32S);
|
||||
#else
|
||||
CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
|
||||
#endif
|
||||
|
||||
Size src_size = terminals.size();
|
||||
|
||||
CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
|
||||
CV_Assert(leftTransp.type() == terminals.type());
|
||||
|
||||
CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
|
||||
CV_Assert(rightTransp.type() == terminals.type());
|
||||
|
||||
CV_Assert(top.size() == src_size);
|
||||
CV_Assert(top.type() == terminals.type());
|
||||
|
||||
CV_Assert(bottom.size() == src_size);
|
||||
CV_Assert(bottom.type() == terminals.type());
|
||||
|
||||
labels.create(src_size, CV_8U);
|
||||
|
||||
NppiSize sznpp;
|
||||
sznpp.width = src_size.width;
|
||||
sznpp.height = src_size.height;
|
||||
|
||||
int bufsz;
|
||||
nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );
|
||||
|
||||
ensureSizeIsEnough(1, bufsz, CV_8U, buf);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc);
|
||||
|
||||
#if (CUDA_VERSION < 5000)
|
||||
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
|
||||
#else
|
||||
if (terminals.type() == CV_32S)
|
||||
{
|
||||
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiGraphcut_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(), top.ptr<Npp32f>(), bottom.ptr<Npp32f>(),
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
|
||||
}
|
||||
#endif
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
|
||||
GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s)
|
||||
{
|
||||
#if (CUDA_VERSION < 5000)
|
||||
CV_Assert(terminals.type() == CV_32S);
|
||||
#else
|
||||
CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
|
||||
#endif
|
||||
|
||||
Size src_size = terminals.size();
|
||||
|
||||
CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
|
||||
CV_Assert(leftTransp.type() == terminals.type());
|
||||
|
||||
CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
|
||||
CV_Assert(rightTransp.type() == terminals.type());
|
||||
|
||||
CV_Assert(top.size() == src_size);
|
||||
CV_Assert(top.type() == terminals.type());
|
||||
|
||||
CV_Assert(topLeft.size() == src_size);
|
||||
CV_Assert(topLeft.type() == terminals.type());
|
||||
|
||||
CV_Assert(topRight.size() == src_size);
|
||||
CV_Assert(topRight.type() == terminals.type());
|
||||
|
||||
CV_Assert(bottom.size() == src_size);
|
||||
CV_Assert(bottom.type() == terminals.type());
|
||||
|
||||
CV_Assert(bottomLeft.size() == src_size);
|
||||
CV_Assert(bottomLeft.type() == terminals.type());
|
||||
|
||||
CV_Assert(bottomRight.size() == src_size);
|
||||
CV_Assert(bottomRight.type() == terminals.type());
|
||||
|
||||
labels.create(src_size, CV_8U);
|
||||
|
||||
NppiSize sznpp;
|
||||
sznpp.width = src_size.width;
|
||||
sznpp.height = src_size.height;
|
||||
|
||||
int bufsz;
|
||||
nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) );
|
||||
|
||||
ensureSizeIsEnough(1, bufsz, CV_8U, buf);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcut8InitAlloc);
|
||||
|
||||
#if (CUDA_VERSION < 5000)
|
||||
nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
|
||||
top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
|
||||
bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
|
||||
#else
|
||||
if (terminals.type() == CV_32S)
|
||||
{
|
||||
nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
|
||||
top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
|
||||
bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiGraphcut8_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(),
|
||||
top.ptr<Npp32f>(), topLeft.ptr<Npp32f>(), topRight.ptr<Npp32f>(),
|
||||
bottom.ptr<Npp32f>(), bottomLeft.ptr<Npp32f>(), bottomRight.ptr<Npp32f>(),
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
|
||||
}
|
||||
#endif
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,439 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace match_template
|
||||
{
|
||||
void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||
void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||
|
||||
void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||
|
||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
|
||||
int cn, cudaStream_t stream);
|
||||
|
||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
|
||||
int cn, cudaStream_t stream);
|
||||
|
||||
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream);
|
||||
void matchTemplatePrepared_CCOFF_8UC2(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g,
|
||||
unsigned int templ_sum_r,
|
||||
unsigned int templ_sum_g,
|
||||
PtrStepSzf result, cudaStream_t stream);
|
||||
void matchTemplatePrepared_CCOFF_8UC3(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g,
|
||||
const PtrStepSz<unsigned int> image_sum_b,
|
||||
unsigned int templ_sum_r,
|
||||
unsigned int templ_sum_g,
|
||||
unsigned int templ_sum_b,
|
||||
PtrStepSzf result, cudaStream_t stream);
|
||||
void matchTemplatePrepared_CCOFF_8UC4(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g,
|
||||
const PtrStepSz<unsigned int> image_sum_b,
|
||||
const PtrStepSz<unsigned int> image_sum_a,
|
||||
unsigned int templ_sum_r,
|
||||
unsigned int templ_sum_g,
|
||||
unsigned int templ_sum_b,
|
||||
unsigned int templ_sum_a,
|
||||
PtrStepSzf result, cudaStream_t stream);
|
||||
|
||||
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||
int w, int h, const PtrStepSz<unsigned int> image_sum,
|
||||
const PtrStepSz<unsigned long long> image_sqsum,
|
||||
unsigned int templ_sum, unsigned long long templ_sqsum,
|
||||
PtrStepSzf result, cudaStream_t stream);
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||
PtrStepSzf result, cudaStream_t stream);
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
||||
PtrStepSzf result, cudaStream_t stream);
|
||||
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||
int w, int h,
|
||||
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
||||
const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
|
||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
||||
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
|
||||
PtrStepSzf result, cudaStream_t stream);
|
||||
|
||||
void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
|
||||
unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||
|
||||
void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::cudev::match_template;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
// Evaluates optimal template's area threshold. If
|
||||
// template's area is less than the threshold, we use naive match
|
||||
// template version, otherwise FFT-based (if available)
|
||||
int getTemplateThreshold(int method, int depth)
|
||||
{
|
||||
switch (method)
|
||||
{
|
||||
case cv::TM_CCORR:
|
||||
if (depth == CV_32F) return 250;
|
||||
if (depth == CV_8U) return 300;
|
||||
break;
|
||||
case cv::TM_SQDIFF:
|
||||
if (depth == CV_8U) return 300;
|
||||
break;
|
||||
}
|
||||
CV_Error(cv::Error::StsBadArg, "getTemplateThreshold: unsupported match template mode");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void matchTemplate_CCORR_32F(
|
||||
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
|
||||
{
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
if (templ.size().area() < getTemplateThreshold(cv::TM_CCORR, CV_32F))
|
||||
{
|
||||
matchTemplateNaive_CCORR_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
||||
return;
|
||||
}
|
||||
|
||||
ConvolveBuf convolve_buf;
|
||||
convolve_buf.user_block_size = buf.user_block_size;
|
||||
|
||||
if (image.channels() == 1)
|
||||
convolve(image.reshape(1), templ.reshape(1), result, true, convolve_buf, stream);
|
||||
else
|
||||
{
|
||||
GpuMat result_;
|
||||
convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf, stream);
|
||||
extractFirstChannel_32F(result_, result, image.channels(), StreamAccessor::getStream(stream));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void matchTemplate_CCORR_8U(
|
||||
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
|
||||
{
|
||||
if (templ.size().area() < getTemplateThreshold(cv::TM_CCORR, CV_8U))
|
||||
{
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
matchTemplateNaive_CCORR_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
||||
return;
|
||||
}
|
||||
|
||||
if (stream)
|
||||
{
|
||||
stream.enqueueConvert(image, buf.imagef, CV_32F);
|
||||
stream.enqueueConvert(templ, buf.templf, CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
image.convertTo(buf.imagef, CV_32F);
|
||||
templ.convertTo(buf.templf, CV_32F);
|
||||
}
|
||||
matchTemplate_CCORR_32F(buf.imagef, buf.templf, result, buf, stream);
|
||||
}
|
||||
|
||||
|
||||
void matchTemplate_CCORR_NORMED_8U(
|
||||
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
|
||||
{
|
||||
matchTemplate_CCORR_8U(image, templ, result, buf, stream);
|
||||
|
||||
buf.image_sqsums.resize(1);
|
||||
sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
|
||||
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
normalize_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
void matchTemplate_SQDIFF_32F(
|
||||
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
|
||||
{
|
||||
(void)buf;
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
void matchTemplate_SQDIFF_8U(
|
||||
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
|
||||
{
|
||||
if (templ.size().area() < getTemplateThreshold(cv::TM_SQDIFF, CV_8U))
|
||||
{
|
||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||
matchTemplateNaive_SQDIFF_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
||||
return;
|
||||
}
|
||||
|
||||
buf.image_sqsums.resize(1);
|
||||
sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
|
||||
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
|
||||
matchTemplate_CCORR_8U(image, templ, result, buf, stream);
|
||||
matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
void matchTemplate_SQDIFF_NORMED_8U(
|
||||
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
|
||||
{
|
||||
buf.image_sqsums.resize(1);
|
||||
sqrIntegral(image.reshape(1), buf.image_sqsums[0], stream);
|
||||
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
|
||||
|
||||
matchTemplate_CCORR_8U(image, templ, result, buf, stream);
|
||||
matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
void matchTemplate_CCOFF_8U(
|
||||
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
|
||||
{
|
||||
matchTemplate_CCORR_8U(image, templ, result, buf, stream);
|
||||
|
||||
if (image.channels() == 1)
|
||||
{
|
||||
buf.image_sums.resize(1);
|
||||
integral(image, buf.image_sums[0], stream);
|
||||
|
||||
unsigned int templ_sum = (unsigned int)sum(templ)[0];
|
||||
matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, buf.image_sums[0], templ_sum, result, StreamAccessor::getStream(stream));
|
||||
}
|
||||
else
|
||||
{
|
||||
split(image, buf.images);
|
||||
buf.image_sums.resize(buf.images.size());
|
||||
for (int i = 0; i < image.channels(); ++i)
|
||||
integral(buf.images[i], buf.image_sums[i], stream);
|
||||
|
||||
Scalar templ_sum = sum(templ);
|
||||
|
||||
switch (image.channels())
|
||||
{
|
||||
case 2:
|
||||
matchTemplatePrepared_CCOFF_8UC2(
|
||||
templ.cols, templ.rows, buf.image_sums[0], buf.image_sums[1],
|
||||
(unsigned int)templ_sum[0], (unsigned int)templ_sum[1],
|
||||
result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
case 3:
|
||||
matchTemplatePrepared_CCOFF_8UC3(
|
||||
templ.cols, templ.rows, buf.image_sums[0], buf.image_sums[1], buf.image_sums[2],
|
||||
(unsigned int)templ_sum[0], (unsigned int)templ_sum[1], (unsigned int)templ_sum[2],
|
||||
result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
case 4:
|
||||
matchTemplatePrepared_CCOFF_8UC4(
|
||||
templ.cols, templ.rows, buf.image_sums[0], buf.image_sums[1], buf.image_sums[2], buf.image_sums[3],
|
||||
(unsigned int)templ_sum[0], (unsigned int)templ_sum[1], (unsigned int)templ_sum[2],
|
||||
(unsigned int)templ_sum[3], result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
default:
|
||||
CV_Error(cv::Error::StsBadArg, "matchTemplate: unsupported number of channels");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void matchTemplate_CCOFF_NORMED_8U(
|
||||
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
|
||||
{
|
||||
if (stream)
|
||||
{
|
||||
stream.enqueueConvert(image, buf.imagef, CV_32F);
|
||||
stream.enqueueConvert(templ, buf.templf, CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
image.convertTo(buf.imagef, CV_32F);
|
||||
templ.convertTo(buf.templf, CV_32F);
|
||||
}
|
||||
|
||||
matchTemplate_CCORR_32F(buf.imagef, buf.templf, result, buf, stream);
|
||||
|
||||
if (image.channels() == 1)
|
||||
{
|
||||
buf.image_sums.resize(1);
|
||||
integral(image, buf.image_sums[0], stream);
|
||||
buf.image_sqsums.resize(1);
|
||||
sqrIntegral(image, buf.image_sqsums[0], stream);
|
||||
|
||||
unsigned int templ_sum = (unsigned int)sum(templ)[0];
|
||||
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ)[0];
|
||||
|
||||
matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||
templ.cols, templ.rows, buf.image_sums[0], buf.image_sqsums[0],
|
||||
templ_sum, templ_sqsum, result, StreamAccessor::getStream(stream));
|
||||
}
|
||||
else
|
||||
{
|
||||
split(image, buf.images);
|
||||
buf.image_sums.resize(buf.images.size());
|
||||
buf.image_sqsums.resize(buf.images.size());
|
||||
for (int i = 0; i < image.channels(); ++i)
|
||||
{
|
||||
integral(buf.images[i], buf.image_sums[i], stream);
|
||||
sqrIntegral(buf.images[i], buf.image_sqsums[i], stream);
|
||||
}
|
||||
|
||||
Scalar templ_sum = sum(templ);
|
||||
Scalar templ_sqsum = sqrSum(templ);
|
||||
|
||||
switch (image.channels())
|
||||
{
|
||||
case 2:
|
||||
matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||
templ.cols, templ.rows,
|
||||
buf.image_sums[0], buf.image_sqsums[0],
|
||||
buf.image_sums[1], buf.image_sqsums[1],
|
||||
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
|
||||
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
|
||||
result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
case 3:
|
||||
matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||
templ.cols, templ.rows,
|
||||
buf.image_sums[0], buf.image_sqsums[0],
|
||||
buf.image_sums[1], buf.image_sqsums[1],
|
||||
buf.image_sums[2], buf.image_sqsums[2],
|
||||
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
|
||||
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
|
||||
(unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
|
||||
result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
case 4:
|
||||
matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||
templ.cols, templ.rows,
|
||||
buf.image_sums[0], buf.image_sqsums[0],
|
||||
buf.image_sums[1], buf.image_sqsums[1],
|
||||
buf.image_sums[2], buf.image_sqsums[2],
|
||||
buf.image_sums[3], buf.image_sqsums[3],
|
||||
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
|
||||
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
|
||||
(unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
|
||||
(unsigned int)templ_sum[3], (unsigned long long)templ_sqsum[3],
|
||||
result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
default:
|
||||
CV_Error(cv::Error::StsBadArg, "matchTemplate: unsupported number of channels");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream& stream)
|
||||
{
|
||||
MatchTemplateBuf buf;
|
||||
matchTemplate(image, templ, result, method, buf, stream);
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::matchTemplate(
|
||||
const GpuMat& image, const GpuMat& templ, GpuMat& result, int method,
|
||||
MatchTemplateBuf &buf, Stream& stream)
|
||||
{
|
||||
CV_Assert(image.type() == templ.type());
|
||||
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
|
||||
|
||||
typedef void (*Caller)(const GpuMat&, const GpuMat&, GpuMat&, MatchTemplateBuf&, Stream& stream);
|
||||
|
||||
static const Caller callers8U[] = { ::matchTemplate_SQDIFF_8U, ::matchTemplate_SQDIFF_NORMED_8U,
|
||||
::matchTemplate_CCORR_8U, ::matchTemplate_CCORR_NORMED_8U,
|
||||
::matchTemplate_CCOFF_8U, ::matchTemplate_CCOFF_NORMED_8U };
|
||||
static const Caller callers32F[] = { ::matchTemplate_SQDIFF_32F, 0,
|
||||
::matchTemplate_CCORR_32F, 0, 0, 0 };
|
||||
|
||||
const Caller* callers = 0;
|
||||
switch (image.depth())
|
||||
{
|
||||
case CV_8U: callers = callers8U; break;
|
||||
case CV_32F: callers = callers32F; break;
|
||||
default: CV_Error(cv::Error::StsBadArg, "matchTemplate: unsupported data type");
|
||||
}
|
||||
|
||||
Caller caller = callers[method];
|
||||
CV_Assert(caller);
|
||||
caller(image, templ, result, buf, stream);
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,387 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::meanShiftSegmentation(const GpuMat&, Mat&, int, int, int, TermCriteria) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
// Auxiliray stuff
|
||||
namespace
|
||||
{
|
||||
|
||||
//
|
||||
// Declarations
|
||||
//
|
||||
|
||||
class DjSets
|
||||
{
|
||||
public:
|
||||
DjSets(int n);
|
||||
int find(int elem);
|
||||
int merge(int set1, int set2);
|
||||
|
||||
std::vector<int> parent;
|
||||
std::vector<int> rank;
|
||||
std::vector<int> size;
|
||||
private:
|
||||
DjSets(const DjSets&);
|
||||
void operator =(const DjSets&);
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct GraphEdge
|
||||
{
|
||||
GraphEdge() {}
|
||||
GraphEdge(int to_, int next_, const T& val_) : to(to_), next(next_), val(val_) {}
|
||||
int to;
|
||||
int next;
|
||||
T val;
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
class Graph
|
||||
{
|
||||
public:
|
||||
typedef GraphEdge<T> Edge;
|
||||
|
||||
Graph(int numv, int nume_max);
|
||||
|
||||
void addEdge(int from, int to, const T& val=T());
|
||||
|
||||
std::vector<int> start;
|
||||
std::vector<Edge> edges;
|
||||
|
||||
int numv;
|
||||
int nume_max;
|
||||
int nume;
|
||||
private:
|
||||
Graph(const Graph&);
|
||||
void operator =(const Graph&);
|
||||
};
|
||||
|
||||
|
||||
struct SegmLinkVal
|
||||
{
|
||||
SegmLinkVal() {}
|
||||
SegmLinkVal(int dr_, int dsp_) : dr(dr_), dsp(dsp_) {}
|
||||
bool operator <(const SegmLinkVal& other) const
|
||||
{
|
||||
return dr + dsp < other.dr + other.dsp;
|
||||
}
|
||||
int dr;
|
||||
int dsp;
|
||||
};
|
||||
|
||||
|
||||
struct SegmLink
|
||||
{
|
||||
SegmLink() {}
|
||||
SegmLink(int from_, int to_, const SegmLinkVal& val_)
|
||||
: from(from_), to(to_), val(val_) {}
|
||||
bool operator <(const SegmLink& other) const
|
||||
{
|
||||
return val < other.val;
|
||||
}
|
||||
int from;
|
||||
int to;
|
||||
SegmLinkVal val;
|
||||
};
|
||||
|
||||
//
|
||||
// Implementation
|
||||
//
|
||||
|
||||
DjSets::DjSets(int n) : parent(n), rank(n, 0), size(n, 1)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
parent[i] = i;
|
||||
}
|
||||
|
||||
|
||||
inline int DjSets::find(int elem)
|
||||
{
|
||||
int set = elem;
|
||||
while (set != parent[set])
|
||||
set = parent[set];
|
||||
while (elem != parent[elem])
|
||||
{
|
||||
int next = parent[elem];
|
||||
parent[elem] = set;
|
||||
elem = next;
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
|
||||
inline int DjSets::merge(int set1, int set2)
|
||||
{
|
||||
if (rank[set1] < rank[set2])
|
||||
{
|
||||
parent[set1] = set2;
|
||||
size[set2] += size[set1];
|
||||
return set2;
|
||||
}
|
||||
if (rank[set2] < rank[set1])
|
||||
{
|
||||
parent[set2] = set1;
|
||||
size[set1] += size[set2];
|
||||
return set1;
|
||||
}
|
||||
parent[set1] = set2;
|
||||
rank[set2]++;
|
||||
size[set2] += size[set1];
|
||||
return set2;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
Graph<T>::Graph(int numv_, int nume_max_) : start(numv_, -1), edges(nume_max_)
|
||||
{
|
||||
this->numv = numv_;
|
||||
this->nume_max = nume_max_;
|
||||
nume = 0;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline void Graph<T>::addEdge(int from, int to, const T& val)
|
||||
{
|
||||
edges[nume] = Edge(to, start[from], val);
|
||||
start[from] = nume;
|
||||
nume++;
|
||||
}
|
||||
|
||||
|
||||
inline int pix(int y, int x, int ncols)
|
||||
{
|
||||
return y * ncols + x;
|
||||
}
|
||||
|
||||
|
||||
inline int sqr(int x)
|
||||
{
|
||||
return x * x;
|
||||
}
|
||||
|
||||
|
||||
inline int dist2(const cv::Vec4b& lhs, const cv::Vec4b& rhs)
|
||||
{
|
||||
return sqr(lhs[0] - rhs[0]) + sqr(lhs[1] - rhs[1]) + sqr(lhs[2] - rhs[2]);
|
||||
}
|
||||
|
||||
|
||||
inline int dist2(const cv::Vec2s& lhs, const cv::Vec2s& rhs)
|
||||
{
|
||||
return sqr(lhs[0] - rhs[0]) + sqr(lhs[1] - rhs[1]);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize, TermCriteria criteria)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC4);
|
||||
const int nrows = src.rows;
|
||||
const int ncols = src.cols;
|
||||
const int hr = sr;
|
||||
const int hsp = sp;
|
||||
|
||||
// Perform mean shift procedure and obtain region and spatial maps
|
||||
GpuMat d_rmap, d_spmap;
|
||||
meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
|
||||
Mat rmap(d_rmap);
|
||||
Mat spmap(d_spmap);
|
||||
|
||||
Graph<SegmLinkVal> g(nrows * ncols, 4 * (nrows - 1) * (ncols - 1)
|
||||
+ (nrows - 1) + (ncols - 1));
|
||||
|
||||
// Make region adjacent graph from image
|
||||
Vec4b r1;
|
||||
Vec4b r2[4];
|
||||
Vec2s sp1;
|
||||
Vec2s sp2[4];
|
||||
int dr[4];
|
||||
int dsp[4];
|
||||
for (int y = 0; y < nrows - 1; ++y)
|
||||
{
|
||||
Vec4b* ry = rmap.ptr<Vec4b>(y);
|
||||
Vec4b* ryp = rmap.ptr<Vec4b>(y + 1);
|
||||
Vec2s* spy = spmap.ptr<Vec2s>(y);
|
||||
Vec2s* spyp = spmap.ptr<Vec2s>(y + 1);
|
||||
for (int x = 0; x < ncols - 1; ++x)
|
||||
{
|
||||
r1 = ry[x];
|
||||
sp1 = spy[x];
|
||||
|
||||
r2[0] = ry[x + 1];
|
||||
r2[1] = ryp[x];
|
||||
r2[2] = ryp[x + 1];
|
||||
r2[3] = ryp[x];
|
||||
|
||||
sp2[0] = spy[x + 1];
|
||||
sp2[1] = spyp[x];
|
||||
sp2[2] = spyp[x + 1];
|
||||
sp2[3] = spyp[x];
|
||||
|
||||
dr[0] = dist2(r1, r2[0]);
|
||||
dr[1] = dist2(r1, r2[1]);
|
||||
dr[2] = dist2(r1, r2[2]);
|
||||
dsp[0] = dist2(sp1, sp2[0]);
|
||||
dsp[1] = dist2(sp1, sp2[1]);
|
||||
dsp[2] = dist2(sp1, sp2[2]);
|
||||
|
||||
r1 = ry[x + 1];
|
||||
sp1 = spy[x + 1];
|
||||
|
||||
dr[3] = dist2(r1, r2[3]);
|
||||
dsp[3] = dist2(sp1, sp2[3]);
|
||||
|
||||
g.addEdge(pix(y, x, ncols), pix(y, x + 1, ncols), SegmLinkVal(dr[0], dsp[0]));
|
||||
g.addEdge(pix(y, x, ncols), pix(y + 1, x, ncols), SegmLinkVal(dr[1], dsp[1]));
|
||||
g.addEdge(pix(y, x, ncols), pix(y + 1, x + 1, ncols), SegmLinkVal(dr[2], dsp[2]));
|
||||
g.addEdge(pix(y, x + 1, ncols), pix(y + 1, x, ncols), SegmLinkVal(dr[3], dsp[3]));
|
||||
}
|
||||
}
|
||||
for (int y = 0; y < nrows - 1; ++y)
|
||||
{
|
||||
r1 = rmap.at<Vec4b>(y, ncols - 1);
|
||||
r2[0] = rmap.at<Vec4b>(y + 1, ncols - 1);
|
||||
sp1 = spmap.at<Vec2s>(y, ncols - 1);
|
||||
sp2[0] = spmap.at<Vec2s>(y + 1, ncols - 1);
|
||||
dr[0] = dist2(r1, r2[0]);
|
||||
dsp[0] = dist2(sp1, sp2[0]);
|
||||
g.addEdge(pix(y, ncols - 1, ncols), pix(y + 1, ncols - 1, ncols), SegmLinkVal(dr[0], dsp[0]));
|
||||
}
|
||||
for (int x = 0; x < ncols - 1; ++x)
|
||||
{
|
||||
r1 = rmap.at<Vec4b>(nrows - 1, x);
|
||||
r2[0] = rmap.at<Vec4b>(nrows - 1, x + 1);
|
||||
sp1 = spmap.at<Vec2s>(nrows - 1, x);
|
||||
sp2[0] = spmap.at<Vec2s>(nrows - 1, x + 1);
|
||||
dr[0] = dist2(r1, r2[0]);
|
||||
dsp[0] = dist2(sp1, sp2[0]);
|
||||
g.addEdge(pix(nrows - 1, x, ncols), pix(nrows - 1, x + 1, ncols), SegmLinkVal(dr[0], dsp[0]));
|
||||
}
|
||||
|
||||
DjSets comps(g.numv);
|
||||
|
||||
// Find adjacent components
|
||||
for (int v = 0; v < g.numv; ++v)
|
||||
{
|
||||
for (int e_it = g.start[v]; e_it != -1; e_it = g.edges[e_it].next)
|
||||
{
|
||||
int c1 = comps.find(v);
|
||||
int c2 = comps.find(g.edges[e_it].to);
|
||||
if (c1 != c2 && g.edges[e_it].val.dr < hr && g.edges[e_it].val.dsp < hsp)
|
||||
comps.merge(c1, c2);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<SegmLink> edges;
|
||||
edges.reserve(g.numv);
|
||||
|
||||
// Prepare edges connecting differnet components
|
||||
for (int v = 0; v < g.numv; ++v)
|
||||
{
|
||||
int c1 = comps.find(v);
|
||||
for (int e_it = g.start[v]; e_it != -1; e_it = g.edges[e_it].next)
|
||||
{
|
||||
int c2 = comps.find(g.edges[e_it].to);
|
||||
if (c1 != c2)
|
||||
edges.push_back(SegmLink(c1, c2, g.edges[e_it].val));
|
||||
}
|
||||
}
|
||||
|
||||
// Sort all graph's edges connecting differnet components (in asceding order)
|
||||
sort(edges.begin(), edges.end());
|
||||
|
||||
// Exclude small components (starting from the nearest couple)
|
||||
for (size_t i = 0; i < edges.size(); ++i)
|
||||
{
|
||||
int c1 = comps.find(edges[i].from);
|
||||
int c2 = comps.find(edges[i].to);
|
||||
if (c1 != c2 && (comps.size[c1] < minsize || comps.size[c2] < minsize))
|
||||
comps.merge(c1, c2);
|
||||
}
|
||||
|
||||
// Compute sum of the pixel's colors which are in the same segment
|
||||
Mat h_src(src);
|
||||
std::vector<Vec4i> sumcols(nrows * ncols, Vec4i(0, 0, 0, 0));
|
||||
for (int y = 0; y < nrows; ++y)
|
||||
{
|
||||
Vec4b* h_srcy = h_src.ptr<Vec4b>(y);
|
||||
for (int x = 0; x < ncols; ++x)
|
||||
{
|
||||
int parent = comps.find(pix(y, x, ncols));
|
||||
Vec4b col = h_srcy[x];
|
||||
Vec4i& sumcol = sumcols[parent];
|
||||
sumcol[0] += col[0];
|
||||
sumcol[1] += col[1];
|
||||
sumcol[2] += col[2];
|
||||
}
|
||||
}
|
||||
|
||||
// Create final image, color of each segment is the average color of its pixels
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
for (int y = 0; y < nrows; ++y)
|
||||
{
|
||||
Vec4b* dsty = dst.ptr<Vec4b>(y);
|
||||
for (int x = 0; x < ncols; ++x)
|
||||
{
|
||||
int parent = comps.find(pix(y, x, ncols));
|
||||
const Vec4i& sumcol = sumcols[parent];
|
||||
Vec4b& dstcol = dsty[x];
|
||||
dstcol[0] = static_cast<uchar>(sumcol[0] / comps.size[parent]);
|
||||
dstcol[1] = static_cast<uchar>(sumcol[1] / comps.size[parent]);
|
||||
dstcol[2] = static_cast<uchar>(sumcol[2] / comps.size[parent]);
|
||||
dstcol[3] = 255;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
@@ -1,249 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::ImagePyramid::build(const GpuMat&, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::ImagePyramid::getLayer(GpuMat&, Size, Stream&) const { throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// pyrDown
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
using namespace cv::gpu::cudev::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{pyrDown_gpu<uchar> , 0 /*pyrDown_gpu<uchar2>*/ , pyrDown_gpu<uchar3> , pyrDown_gpu<uchar4> },
|
||||
{0 /*pyrDown_gpu<schar>*/, 0 /*pyrDown_gpu<schar2>*/ , 0 /*pyrDown_gpu<schar3>*/, 0 /*pyrDown_gpu<schar4>*/},
|
||||
{pyrDown_gpu<ushort> , 0 /*pyrDown_gpu<ushort2>*/, pyrDown_gpu<ushort3> , pyrDown_gpu<ushort4> },
|
||||
{pyrDown_gpu<short> , 0 /*pyrDown_gpu<short2>*/ , pyrDown_gpu<short3> , pyrDown_gpu<short4> },
|
||||
{0 /*pyrDown_gpu<int>*/ , 0 /*pyrDown_gpu<int2>*/ , 0 /*pyrDown_gpu<int3>*/ , 0 /*pyrDown_gpu<int4>*/ },
|
||||
{pyrDown_gpu<float> , 0 /*pyrDown_gpu<float2>*/ , pyrDown_gpu<float3> , pyrDown_gpu<float4> }
|
||||
};
|
||||
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
||||
|
||||
func(src, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// pyrUp
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
using namespace cv::gpu::cudev::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{pyrUp_gpu<uchar> , 0 /*pyrUp_gpu<uchar2>*/ , pyrUp_gpu<uchar3> , pyrUp_gpu<uchar4> },
|
||||
{0 /*pyrUp_gpu<schar>*/, 0 /*pyrUp_gpu<schar2>*/ , 0 /*pyrUp_gpu<schar3>*/, 0 /*pyrUp_gpu<schar4>*/},
|
||||
{pyrUp_gpu<ushort> , 0 /*pyrUp_gpu<ushort2>*/, pyrUp_gpu<ushort3> , pyrUp_gpu<ushort4> },
|
||||
{pyrUp_gpu<short> , 0 /*pyrUp_gpu<short2>*/ , pyrUp_gpu<short3> , pyrUp_gpu<short4> },
|
||||
{0 /*pyrUp_gpu<int>*/ , 0 /*pyrUp_gpu<int2>*/ , 0 /*pyrUp_gpu<int3>*/ , 0 /*pyrUp_gpu<int4>*/ },
|
||||
{pyrUp_gpu<float> , 0 /*pyrUp_gpu<float2>*/ , pyrUp_gpu<float3> , pyrUp_gpu<float4> }
|
||||
};
|
||||
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
dst.create(src.rows * 2, src.cols * 2, src.type());
|
||||
|
||||
func(src, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// ImagePyramid
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace pyramid
|
||||
{
|
||||
template <typename T> void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template <typename T> void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stream)
|
||||
{
|
||||
using namespace cv::gpu::cudev::pyramid;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{kernelDownsampleX2_gpu<uchar1> , 0 /*kernelDownsampleX2_gpu<uchar2>*/ , kernelDownsampleX2_gpu<uchar3> , kernelDownsampleX2_gpu<uchar4> },
|
||||
{0 /*kernelDownsampleX2_gpu<char1>*/ , 0 /*kernelDownsampleX2_gpu<char2>*/ , 0 /*kernelDownsampleX2_gpu<char3>*/ , 0 /*kernelDownsampleX2_gpu<char4>*/ },
|
||||
{kernelDownsampleX2_gpu<ushort1> , 0 /*kernelDownsampleX2_gpu<ushort2>*/, kernelDownsampleX2_gpu<ushort3> , kernelDownsampleX2_gpu<ushort4> },
|
||||
{0 /*kernelDownsampleX2_gpu<short1>*/ , 0 /*kernelDownsampleX2_gpu<short2>*/ , 0 /*kernelDownsampleX2_gpu<short3>*/, 0 /*kernelDownsampleX2_gpu<short4>*/},
|
||||
{0 /*kernelDownsampleX2_gpu<int1>*/ , 0 /*kernelDownsampleX2_gpu<int2>*/ , 0 /*kernelDownsampleX2_gpu<int3>*/ , 0 /*kernelDownsampleX2_gpu<int4>*/ },
|
||||
{kernelDownsampleX2_gpu<float1> , 0 /*kernelDownsampleX2_gpu<float2>*/ , kernelDownsampleX2_gpu<float3> , kernelDownsampleX2_gpu<float4> }
|
||||
};
|
||||
|
||||
CV_Assert(img.depth() <= CV_32F && img.channels() <= 4);
|
||||
|
||||
const func_t func = funcs[img.depth()][img.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
layer0_ = img;
|
||||
Size szLastLayer = img.size();
|
||||
nLayers_ = 1;
|
||||
|
||||
if (numLayers <= 0)
|
||||
numLayers = 255; //it will cut-off when any of the dimensions goes 1
|
||||
|
||||
pyramid_.resize(numLayers);
|
||||
|
||||
for (int i = 0; i < numLayers - 1; ++i)
|
||||
{
|
||||
Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
|
||||
|
||||
if (szCurLayer.width == 0 || szCurLayer.height == 0)
|
||||
break;
|
||||
|
||||
ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
|
||||
nLayers_++;
|
||||
|
||||
const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
|
||||
|
||||
func(prevLayer, pyramid_[i], StreamAccessor::getStream(stream));
|
||||
|
||||
szLastLayer = szCurLayer;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream) const
|
||||
{
|
||||
using namespace cv::gpu::cudev::pyramid;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{kernelInterpolateFrom1_gpu<uchar1> , 0 /*kernelInterpolateFrom1_gpu<uchar2>*/ , kernelInterpolateFrom1_gpu<uchar3> , kernelInterpolateFrom1_gpu<uchar4> },
|
||||
{0 /*kernelInterpolateFrom1_gpu<char1>*/ , 0 /*kernelInterpolateFrom1_gpu<char2>*/ , 0 /*kernelInterpolateFrom1_gpu<char3>*/ , 0 /*kernelInterpolateFrom1_gpu<char4>*/ },
|
||||
{kernelInterpolateFrom1_gpu<ushort1> , 0 /*kernelInterpolateFrom1_gpu<ushort2>*/, kernelInterpolateFrom1_gpu<ushort3> , kernelInterpolateFrom1_gpu<ushort4> },
|
||||
{0 /*kernelInterpolateFrom1_gpu<short1>*/, 0 /*kernelInterpolateFrom1_gpu<short2>*/ , 0 /*kernelInterpolateFrom1_gpu<short3>*/, 0 /*kernelInterpolateFrom1_gpu<short4>*/},
|
||||
{0 /*kernelInterpolateFrom1_gpu<int1>*/ , 0 /*kernelInterpolateFrom1_gpu<int2>*/ , 0 /*kernelInterpolateFrom1_gpu<int3>*/ , 0 /*kernelInterpolateFrom1_gpu<int4>*/ },
|
||||
{kernelInterpolateFrom1_gpu<float1> , 0 /*kernelInterpolateFrom1_gpu<float2>*/ , kernelInterpolateFrom1_gpu<float3> , kernelInterpolateFrom1_gpu<float4> }
|
||||
};
|
||||
|
||||
CV_Assert(outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0);
|
||||
|
||||
ensureSizeIsEnough(outRoi, layer0_.type(), outImg);
|
||||
|
||||
const func_t func = funcs[outImg.depth()][outImg.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
|
||||
{
|
||||
if (stream)
|
||||
stream.enqueueCopy(layer0_, outImg);
|
||||
else
|
||||
layer0_.copyTo(outImg);
|
||||
}
|
||||
|
||||
float lastScale = 1.0f;
|
||||
float curScale;
|
||||
GpuMat lastLayer = layer0_;
|
||||
GpuMat curLayer;
|
||||
|
||||
for (int i = 0; i < nLayers_ - 1; ++i)
|
||||
{
|
||||
curScale = lastScale * 0.5f;
|
||||
curLayer = pyramid_[i];
|
||||
|
||||
if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
|
||||
{
|
||||
if (stream)
|
||||
stream.enqueueCopy(curLayer, outImg);
|
||||
else
|
||||
curLayer.copyTo(outImg);
|
||||
}
|
||||
|
||||
if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
|
||||
break;
|
||||
|
||||
lastScale = curScale;
|
||||
lastLayer = curLayer;
|
||||
}
|
||||
|
||||
func(lastLayer, outImg, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
@@ -1,102 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, Scalar, Stream&){ throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T>
|
||||
void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst,
|
||||
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
|
||||
{
|
||||
using namespace cv::gpu::cudev::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{remap_gpu<uchar> , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3> , remap_gpu<uchar4> },
|
||||
{0 /*remap_gpu<schar>*/, 0 /*remap_gpu<char2>*/ , 0 /*remap_gpu<char3>*/, 0 /*remap_gpu<char4>*/},
|
||||
{remap_gpu<ushort> , 0 /*remap_gpu<ushort2>*/, remap_gpu<ushort3> , remap_gpu<ushort4> },
|
||||
{remap_gpu<short> , 0 /*remap_gpu<short2>*/ , remap_gpu<short3> , remap_gpu<short4> },
|
||||
{0 /*remap_gpu<int>*/ , 0 /*remap_gpu<int2>*/ , 0 /*remap_gpu<int3>*/ , 0 /*remap_gpu<int4>*/ },
|
||||
{remap_gpu<float> , 0 /*remap_gpu<float2>*/ , remap_gpu<float3> , remap_gpu<float4> }
|
||||
};
|
||||
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size());
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
int gpuBorderType;
|
||||
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
|
||||
|
||||
dst.create(xmap.size(), src.type());
|
||||
|
||||
Scalar_<float> borderValueFloat;
|
||||
borderValueFloat = borderValue;
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
|
||||
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
@@ -1,162 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
|
||||
{
|
||||
(void)src;
|
||||
(void)dst;
|
||||
(void)dsize;
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
(void)interpolation;
|
||||
(void)s;
|
||||
|
||||
throw_no_cuda();
|
||||
}
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T>
|
||||
void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
|
||||
{
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR
|
||||
|| interpolation == INTER_CUBIC || interpolation == INTER_AREA);
|
||||
CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));
|
||||
|
||||
if (dsize == Size())
|
||||
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
|
||||
else
|
||||
{
|
||||
fx = static_cast<double>(dsize.width) / src.cols;
|
||||
fy = static_cast<double>(dsize.height) / src.rows;
|
||||
}
|
||||
if (dsize != dst.size())
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
if (dsize == src.size())
|
||||
{
|
||||
if (s)
|
||||
s.enqueueCopy(src, dst);
|
||||
else
|
||||
src.copyTo(dst);
|
||||
return;
|
||||
}
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
bool useNpp = (src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||
useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR);
|
||||
|
||||
if (useNpp)
|
||||
{
|
||||
typedef NppStatus (*func_t)(const Npp8u * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, Npp8u * pDst, int nDstStep, NppiSize dstROISize,
|
||||
double xFactor, double yFactor, int eInterpolation);
|
||||
|
||||
const func_t funcs[4] = { nppiResize_8u_C1R, 0, 0, nppiResize_8u_C4R };
|
||||
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.width = wholeSize.width;
|
||||
srcsz.height = wholeSize.height;
|
||||
|
||||
NppiRect srcrect;
|
||||
srcrect.x = ofs.x;
|
||||
srcrect.y = ofs.y;
|
||||
srcrect.width = src.cols;
|
||||
srcrect.height = src.rows;
|
||||
|
||||
NppiSize dstsz;
|
||||
dstsz.width = dst.cols;
|
||||
dstsz.height = dst.rows;
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( funcs[src.channels() - 1](src.datastart, srcsz, static_cast<int>(src.step), srcrect,
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
using namespace ::cv::gpu::cudev::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{resize_gpu<uchar> , 0 /*resize_gpu<uchar2>*/ , resize_gpu<uchar3> , resize_gpu<uchar4> },
|
||||
{0 /*resize_gpu<schar>*/, 0 /*resize_gpu<char2>*/ , 0 /*resize_gpu<char3>*/, 0 /*resize_gpu<char4>*/},
|
||||
{resize_gpu<ushort> , 0 /*resize_gpu<ushort2>*/, resize_gpu<ushort3> , resize_gpu<ushort4> },
|
||||
{resize_gpu<short> , 0 /*resize_gpu<short2>*/ , resize_gpu<short3> , resize_gpu<short4> },
|
||||
{0 /*resize_gpu<int>*/ , 0 /*resize_gpu<int2>*/ , 0 /*resize_gpu<int3>*/ , 0 /*resize_gpu<int4>*/ },
|
||||
{resize_gpu<float> , 0 /*resize_gpu<float2>*/ , resize_gpu<float3> , resize_gpu<float4> }
|
||||
};
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y,
|
||||
static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, stream);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
@@ -1,454 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
|
||||
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::buildWarpAffineMaps(const Mat&, bool, Size, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::buildWarpPerspectiveMaps(const Mat&, bool, Size, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream)
|
||||
{
|
||||
using namespace cv::gpu::cudev::imgproc;
|
||||
|
||||
CV_Assert(M.rows == 2 && M.cols == 3);
|
||||
|
||||
xmap.create(dsize, CV_32FC1);
|
||||
ymap.create(dsize, CV_32FC1);
|
||||
|
||||
float coeffs[2 * 3];
|
||||
Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
|
||||
|
||||
if (inverse)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invertAffineTransform(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
buildWarpAffineMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream)
|
||||
{
|
||||
using namespace cv::gpu::cudev::imgproc;
|
||||
|
||||
CV_Assert(M.rows == 3 && M.cols == 3);
|
||||
|
||||
xmap.create(dsize, CV_32FC1);
|
||||
ymap.create(dsize, CV_32FC1);
|
||||
|
||||
float coeffs[3 * 3];
|
||||
Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
|
||||
|
||||
if (inverse)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invert(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
buildWarpPerspectiveMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template<int DEPTH> struct NppTypeTraits;
|
||||
template<> struct NppTypeTraits<CV_8U> { typedef Npp8u npp_t; };
|
||||
template<> struct NppTypeTraits<CV_8S> { typedef Npp8s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
|
||||
template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; typedef Npp16sc npp_complex_type; };
|
||||
template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; typedef Npp32sc npp_complex_type; };
|
||||
template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; typedef Npp32fc npp_complex_type; };
|
||||
template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; typedef Npp64fc npp_complex_type; };
|
||||
|
||||
template <int DEPTH> struct NppWarpFunc
|
||||
{
|
||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||
|
||||
typedef NppStatus (*func_t)(const npp_t* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_t* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
};
|
||||
|
||||
template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
|
||||
{
|
||||
typedef typename NppWarpFunc<DEPTH>::npp_t npp_t;
|
||||
|
||||
static void call(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream)
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
|
||||
NppiRect srcroi;
|
||||
srcroi.x = 0;
|
||||
srcroi.y = 0;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
|
||||
NppiRect dstroi;
|
||||
dstroi.x = 0;
|
||||
dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
cv::gpu::NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi,
|
||||
coeffs, npp_inter[interpolation]) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& s)
|
||||
{
|
||||
CV_Assert(M.rows == 2 && M.cols == 3);
|
||||
|
||||
int interpolation = flags & INTER_MAX;
|
||||
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
static const bool useNppTab[6][4][3] =
|
||||
{
|
||||
{
|
||||
{false, false, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, true}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, true}
|
||||
}
|
||||
};
|
||||
|
||||
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
|
||||
// NPP bug on float data
|
||||
useNpp = useNpp && src.depth() != CV_32F;
|
||||
|
||||
if (useNpp)
|
||||
{
|
||||
typedef void (*func_t)(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[2][6][4] =
|
||||
{
|
||||
{
|
||||
{NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call},
|
||||
{NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call}
|
||||
},
|
||||
{
|
||||
{NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call},
|
||||
{NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call}
|
||||
}
|
||||
};
|
||||
|
||||
dst.setTo(borderValue);
|
||||
|
||||
double coeffs[2][3];
|
||||
Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
|
||||
const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
func(src, dst, coeffs, interpolation, StreamAccessor::getStream(s));
|
||||
}
|
||||
else
|
||||
{
|
||||
using namespace cv::gpu::cudev::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{warpAffine_gpu<uchar> , 0 /*warpAffine_gpu<uchar2>*/ , warpAffine_gpu<uchar3> , warpAffine_gpu<uchar4> },
|
||||
{0 /*warpAffine_gpu<schar>*/, 0 /*warpAffine_gpu<char2>*/ , 0 /*warpAffine_gpu<char3>*/, 0 /*warpAffine_gpu<char4>*/},
|
||||
{warpAffine_gpu<ushort> , 0 /*warpAffine_gpu<ushort2>*/, warpAffine_gpu<ushort3> , warpAffine_gpu<ushort4> },
|
||||
{warpAffine_gpu<short> , 0 /*warpAffine_gpu<short2>*/ , warpAffine_gpu<short3> , warpAffine_gpu<short4> },
|
||||
{0 /*warpAffine_gpu<int>*/ , 0 /*warpAffine_gpu<int2>*/ , 0 /*warpAffine_gpu<int3>*/ , 0 /*warpAffine_gpu<int4>*/ },
|
||||
{warpAffine_gpu<float> , 0 /*warpAffine_gpu<float2>*/ , warpAffine_gpu<float3> , warpAffine_gpu<float4> }
|
||||
};
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
int gpuBorderType;
|
||||
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
|
||||
|
||||
float coeffs[2 * 3];
|
||||
Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
|
||||
|
||||
if (flags & WARP_INVERSE_MAP)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invertAffineTransform(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
Scalar_<float> borderValueFloat;
|
||||
borderValueFloat = borderValue;
|
||||
|
||||
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
|
||||
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& s)
|
||||
{
|
||||
CV_Assert(M.rows == 3 && M.cols == 3);
|
||||
|
||||
int interpolation = flags & INTER_MAX;
|
||||
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
static const bool useNppTab[6][4][3] =
|
||||
{
|
||||
{
|
||||
{false, false, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, true}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, true}
|
||||
}
|
||||
};
|
||||
|
||||
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
|
||||
// NPP bug on float data
|
||||
useNpp = useNpp && src.depth() != CV_32F;
|
||||
|
||||
if (useNpp)
|
||||
{
|
||||
typedef void (*func_t)(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[2][6][4] =
|
||||
{
|
||||
{
|
||||
{NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
|
||||
{NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
|
||||
},
|
||||
{
|
||||
{NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
|
||||
{NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
|
||||
}
|
||||
};
|
||||
|
||||
dst.setTo(borderValue);
|
||||
|
||||
double coeffs[3][3];
|
||||
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
|
||||
const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
func(src, dst, coeffs, interpolation, StreamAccessor::getStream(s));
|
||||
}
|
||||
else
|
||||
{
|
||||
using namespace cv::gpu::cudev::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{warpPerspective_gpu<uchar> , 0 /*warpPerspective_gpu<uchar2>*/ , warpPerspective_gpu<uchar3> , warpPerspective_gpu<uchar4> },
|
||||
{0 /*warpPerspective_gpu<schar>*/, 0 /*warpPerspective_gpu<char2>*/ , 0 /*warpPerspective_gpu<char3>*/, 0 /*warpPerspective_gpu<char4>*/},
|
||||
{warpPerspective_gpu<ushort> , 0 /*warpPerspective_gpu<ushort2>*/, warpPerspective_gpu<ushort3> , warpPerspective_gpu<ushort4> },
|
||||
{warpPerspective_gpu<short> , 0 /*warpPerspective_gpu<short2>*/ , warpPerspective_gpu<short3> , warpPerspective_gpu<short4> },
|
||||
{0 /*warpPerspective_gpu<int>*/ , 0 /*warpPerspective_gpu<int2>*/ , 0 /*warpPerspective_gpu<int3>*/ , 0 /*warpPerspective_gpu<int4>*/ },
|
||||
{warpPerspective_gpu<float> , 0 /*warpPerspective_gpu<float2>*/ , warpPerspective_gpu<float3> , warpPerspective_gpu<float4> }
|
||||
};
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
int gpuBorderType;
|
||||
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
|
||||
|
||||
float coeffs[3 * 3];
|
||||
Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
|
||||
|
||||
if (flags & WARP_INVERSE_MAP)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invert(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
Scalar_<float> borderValueFloat;
|
||||
borderValueFloat = borderValue;
|
||||
|
||||
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
|
||||
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), deviceSupports(FEATURE_SET_COMPUTE_20));
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
Reference in New Issue
Block a user