renamed gpuwarping -> cudawarping
This commit is contained in:
221
modules/cudawarping/src/cuda/build_warp_maps.cu
Normal file
221
modules/cudawarping/src/cuda/build_warp_maps.cu
Normal file
@@ -0,0 +1,221 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
// TODO use intrinsics like __sinf and so on
|
||||
|
||||
namespace build_warp_maps
|
||||
{
|
||||
|
||||
__constant__ float ck_rinv[9];
|
||||
__constant__ float cr_kinv[9];
|
||||
__constant__ float ct[3];
|
||||
__constant__ float cscale;
|
||||
}
|
||||
|
||||
|
||||
class PlaneMapper
|
||||
{
|
||||
public:
|
||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||
{
|
||||
using namespace build_warp_maps;
|
||||
|
||||
float x_ = u / cscale - ct[0];
|
||||
float y_ = v / cscale - ct[1];
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
|
||||
|
||||
x /= z;
|
||||
y /= z;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class CylindricalMapper
|
||||
{
|
||||
public:
|
||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||
{
|
||||
using namespace build_warp_maps;
|
||||
|
||||
u /= cscale;
|
||||
float x_ = ::sinf(u);
|
||||
float y_ = v / cscale;
|
||||
float z_ = ::cosf(u);
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class SphericalMapper
|
||||
{
|
||||
public:
|
||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||
{
|
||||
using namespace build_warp_maps;
|
||||
|
||||
v /= cscale;
|
||||
u /= cscale;
|
||||
|
||||
float sinv = ::sinf(v);
|
||||
float x_ = sinv * ::sinf(u);
|
||||
float y_ = -::cosf(v);
|
||||
float z_ = sinv * ::cosf(u);
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Mapper>
|
||||
__global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,
|
||||
PtrStepf map_x, PtrStepf map_y)
|
||||
{
|
||||
int du = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int dv = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (du < cols && dv < rows)
|
||||
{
|
||||
float u = tl_u + du;
|
||||
float v = tl_v + dv;
|
||||
float x, y;
|
||||
Mapper::mapBackward(u, v, x, y);
|
||||
map_x.ptr(dv)[du] = x;
|
||||
map_y.ptr(dv)[du] = y;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], const float t[3],
|
||||
float scale, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<PlaneMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<CylindricalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace cuda { namespace cudev {
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
228
modules/cudawarping/src/cuda/pyr_down.cu
Normal file
228
modules/cudawarping/src/cuda/pyr_down.cu
Normal file
@@ -0,0 +1,228 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T, typename B> __global__ void pyrDown(const PtrStepSz<T> src, PtrStep<T> dst, const B b, int dst_cols)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_t;
|
||||
|
||||
__shared__ work_t smem[256 + 4];
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y;
|
||||
|
||||
const int src_y = 2 * y;
|
||||
|
||||
if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2)
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, x);
|
||||
sum = sum + 0.25f * src(src_y - 1, x);
|
||||
sum = sum + 0.375f * src(src_y , x);
|
||||
sum = sum + 0.25f * src(src_y + 1, x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, x);
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, left_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, left_x);
|
||||
sum = sum + 0.375f * src(src_y , left_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, left_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, left_x);
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, right_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, right_x);
|
||||
sum = sum + 0.375f * src(src_y , right_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, right_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, right_x);
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x));
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x));
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x));
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < 128)
|
||||
{
|
||||
const int tid2 = threadIdx.x * 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
|
||||
|
||||
if (dst_x < dst_cols)
|
||||
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(256);
|
||||
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
||||
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
196
modules/cudawarping/src/cuda/pyr_up.cu
Normal file
196
modules/cudawarping/src/cuda/pyr_up.cu
Normal file
@@ -0,0 +1,196 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
__shared__ sum_t s_srcPatch[10][10];
|
||||
__shared__ sum_t s_dstPatch[20][16];
|
||||
|
||||
if (threadIdx.x < 10 && threadIdx.y < 10)
|
||||
{
|
||||
int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
|
||||
int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
|
||||
|
||||
srcx = ::abs(srcx);
|
||||
srcx = ::min(src.cols - 1, srcx);
|
||||
|
||||
srcy = ::abs(srcy);
|
||||
srcy = ::min(src.rows - 1, srcy);
|
||||
|
||||
s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
|
||||
const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0);
|
||||
const bool eveny = ((threadIdx.y & 1) == 0);
|
||||
const int tidx = threadIdx.x;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
|
||||
|
||||
if (threadIdx.y < 2)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.y > 13)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int tidy = threadIdx.y;
|
||||
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x];
|
||||
sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x];
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
dst(y, x) = saturate_cast<T>(4.0f * sum);
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
pyrUp<<<grid, block, 0, stream>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
274
modules/cudawarping/src/cuda/remap.cu
Normal file
274
modules/cudawarping/src/cuda/remap.cu
Normal file
@@ -0,0 +1,274 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/filters.hpp"
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float xcoo = mapx.ptr(y)[x];
|
||||
const float ycoo = mapy.ptr(y)[x];
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
|
||||
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_remap_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float* borderValue, bool cc20) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float*, bool) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
if (stream == 0)
|
||||
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
|
||||
else
|
||||
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const caller_t callers[3][5] =
|
||||
{
|
||||
{
|
||||
RemapDispatcher<PointFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdWrap, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReflect101, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdWrap, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReflect101, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdWrap, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReflect101, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
callers[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void remap_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void remap_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
482
modules/cudawarping/src/cuda/resize.cu
Normal file
482
modules/cudawarping/src/cuda/resize.cu
Normal file
@@ -0,0 +1,482 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <cfloat>
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/filters.hpp"
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
// kernels
|
||||
|
||||
template <typename T> __global__ void resize_nearest(const PtrStep<T> src, PtrStepSz<T> dst, const float fy, const float fx)
|
||||
{
|
||||
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (dst_x < dst.cols && dst_y < dst.rows)
|
||||
{
|
||||
const float src_x = dst_x * fx;
|
||||
const float src_y = dst_y * fy;
|
||||
|
||||
dst(dst_y, dst_x) = src(__float2int_rz(src_y), __float2int_rz(src_x));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> __global__ void resize_linear(const PtrStepSz<T> src, PtrStepSz<T> dst, const float fy, const float fx)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (dst_x < dst.cols && dst_y < dst.rows)
|
||||
{
|
||||
const float src_x = dst_x * fx;
|
||||
const float src_y = dst_y * fy;
|
||||
|
||||
work_type out = VecTraits<work_type>::all(0);
|
||||
|
||||
const int x1 = __float2int_rd(src_x);
|
||||
const int y1 = __float2int_rd(src_y);
|
||||
const int x2 = x1 + 1;
|
||||
const int y2 = y1 + 1;
|
||||
const int x2_read = ::min(x2, src.cols - 1);
|
||||
const int y2_read = ::min(y2, src.rows - 1);
|
||||
|
||||
T src_reg = src(y1, x1);
|
||||
out = out + src_reg * ((x2 - src_x) * (y2 - src_y));
|
||||
|
||||
src_reg = src(y1, x2_read);
|
||||
out = out + src_reg * ((src_x - x1) * (y2 - src_y));
|
||||
|
||||
src_reg = src(y2_read, x1);
|
||||
out = out + src_reg * ((x2 - src_x) * (src_y - y1));
|
||||
|
||||
src_reg = src(y2_read, x2_read);
|
||||
out = out + src_reg * ((src_x - x1) * (src_y - y1));
|
||||
|
||||
dst(dst_y, dst_x) = saturate_cast<T>(out);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Ptr2D, typename T> __global__ void resize(const Ptr2D src, PtrStepSz<T> dst, const float fy, const float fx)
|
||||
{
|
||||
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (dst_x < dst.cols && dst_y < dst.rows)
|
||||
{
|
||||
const float src_x = dst_x * fx;
|
||||
const float src_y = dst_y * fy;
|
||||
|
||||
dst(dst_y, dst_x) = src(src_y, src_x);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
dst(y, x) = src(y, x);
|
||||
}
|
||||
}
|
||||
|
||||
// textures
|
||||
|
||||
template <typename T> struct TextureAccessor;
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
|
||||
texture<type, cudaTextureType2D, cudaReadModeElementType> tex_resize_##type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
template <> struct TextureAccessor<type> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff; \
|
||||
int yoff; \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_resize_##type, x + xoff, y + yoff); \
|
||||
} \
|
||||
__host__ static void bind(const PtrStepSz<type>& mat) \
|
||||
{ \
|
||||
bindTexture(&tex_resize_##type, mat); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
|
||||
|
||||
template <typename T>
|
||||
TextureAccessor<T> texAccessor(const PtrStepSz<T>& mat, int yoff, int xoff)
|
||||
{
|
||||
TextureAccessor<T>::bind(mat);
|
||||
|
||||
TextureAccessor<T> t;
|
||||
t.xoff = xoff;
|
||||
t.yoff = yoff;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
// callers for nearest interpolation
|
||||
|
||||
template <typename T>
|
||||
void call_resize_nearest_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
resize_nearest<<<grid, block, 0, stream>>>(src, dst, fy, fx);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void call_resize_nearest_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
resize<<<grid, block>>>(texAccessor(srcWhole, yoff, xoff), dst, fy, fx);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
// callers for linear interpolation
|
||||
|
||||
template <typename T>
|
||||
void call_resize_linear_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
resize_linear<<<grid, block>>>(src, dst, fy, fx);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void call_resize_linear_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
if (srcWhole.data == src.data)
|
||||
{
|
||||
TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
|
||||
LinearFilter< TextureAccessor<T> > filteredSrc(texSrc);
|
||||
|
||||
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
|
||||
}
|
||||
else
|
||||
{
|
||||
TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
|
||||
LinearFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
|
||||
|
||||
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
// callers for cubic interpolation
|
||||
|
||||
template <typename T>
|
||||
void call_resize_cubic_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
CubicFilter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
|
||||
|
||||
resize<<<grid, block, 0, stream>>>(filteredSrc, dst, fy, fx);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void call_resize_cubic_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
|
||||
{
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
if (srcWhole.data == src.data)
|
||||
{
|
||||
TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
|
||||
CubicFilter< TextureAccessor<T> > filteredSrc(texSrc);
|
||||
|
||||
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
|
||||
}
|
||||
else
|
||||
{
|
||||
TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
|
||||
CubicFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
|
||||
|
||||
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
// ResizeNearestDispatcher
|
||||
|
||||
template <typename T> struct ResizeNearestDispatcher
|
||||
{
|
||||
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
call_resize_nearest_glob(src, dst, fy, fx, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct SelectImplForNearest
|
||||
{
|
||||
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
if (stream)
|
||||
call_resize_nearest_glob(src, dst, fy, fx, stream);
|
||||
else
|
||||
{
|
||||
if (fx > 1 || fy > 1)
|
||||
call_resize_nearest_glob(src, dst, fy, fx, 0);
|
||||
else
|
||||
call_resize_nearest_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct ResizeNearestDispatcher<uchar> : SelectImplForNearest<uchar> {};
|
||||
template <> struct ResizeNearestDispatcher<uchar4> : SelectImplForNearest<uchar4> {};
|
||||
|
||||
template <> struct ResizeNearestDispatcher<ushort> : SelectImplForNearest<ushort> {};
|
||||
template <> struct ResizeNearestDispatcher<ushort4> : SelectImplForNearest<ushort4> {};
|
||||
|
||||
template <> struct ResizeNearestDispatcher<short> : SelectImplForNearest<short> {};
|
||||
template <> struct ResizeNearestDispatcher<short4> : SelectImplForNearest<short4> {};
|
||||
|
||||
template <> struct ResizeNearestDispatcher<float> : SelectImplForNearest<float> {};
|
||||
template <> struct ResizeNearestDispatcher<float4> : SelectImplForNearest<float4> {};
|
||||
|
||||
// ResizeLinearDispatcher
|
||||
|
||||
template <typename T> struct ResizeLinearDispatcher
|
||||
{
|
||||
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
call_resize_linear_glob(src, dst, fy, fx, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct SelectImplForLinear
|
||||
{
|
||||
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
if (stream)
|
||||
call_resize_linear_glob(src, dst, fy, fx, stream);
|
||||
else
|
||||
{
|
||||
if (fx > 1 || fy > 1)
|
||||
call_resize_linear_glob(src, dst, fy, fx, 0);
|
||||
else
|
||||
call_resize_linear_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct ResizeLinearDispatcher<uchar> : SelectImplForLinear<uchar> {};
|
||||
template <> struct ResizeLinearDispatcher<uchar4> : SelectImplForLinear<uchar4> {};
|
||||
|
||||
template <> struct ResizeLinearDispatcher<ushort> : SelectImplForLinear<ushort> {};
|
||||
template <> struct ResizeLinearDispatcher<ushort4> : SelectImplForLinear<ushort4> {};
|
||||
|
||||
template <> struct ResizeLinearDispatcher<short> : SelectImplForLinear<short> {};
|
||||
template <> struct ResizeLinearDispatcher<short4> : SelectImplForLinear<short4> {};
|
||||
|
||||
template <> struct ResizeLinearDispatcher<float> : SelectImplForLinear<float> {};
|
||||
template <> struct ResizeLinearDispatcher<float4> : SelectImplForLinear<float4> {};
|
||||
|
||||
// ResizeCubicDispatcher
|
||||
|
||||
template <typename T> struct ResizeCubicDispatcher
|
||||
{
|
||||
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
call_resize_cubic_glob(src, dst, fy, fx, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct SelectImplForCubic
|
||||
{
|
||||
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
if (stream)
|
||||
call_resize_cubic_glob(src, dst, fy, fx, stream);
|
||||
else
|
||||
call_resize_cubic_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct ResizeCubicDispatcher<uchar> : SelectImplForCubic<uchar> {};
|
||||
template <> struct ResizeCubicDispatcher<uchar4> : SelectImplForCubic<uchar4> {};
|
||||
|
||||
template <> struct ResizeCubicDispatcher<ushort> : SelectImplForCubic<ushort> {};
|
||||
template <> struct ResizeCubicDispatcher<ushort4> : SelectImplForCubic<ushort4> {};
|
||||
|
||||
template <> struct ResizeCubicDispatcher<short> : SelectImplForCubic<short> {};
|
||||
template <> struct ResizeCubicDispatcher<short4> : SelectImplForCubic<short4> {};
|
||||
|
||||
template <> struct ResizeCubicDispatcher<float> : SelectImplForCubic<float> {};
|
||||
template <> struct ResizeCubicDispatcher<float4> : SelectImplForCubic<float4> {};
|
||||
|
||||
// ResizeAreaDispatcher
|
||||
|
||||
template <typename T> struct ResizeAreaDispatcher
|
||||
{
|
||||
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>&, int, int, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
|
||||
{
|
||||
const int iscale_x = (int) round(fx);
|
||||
const int iscale_y = (int) round(fy);
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
if (std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
|
||||
{
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
// resize
|
||||
|
||||
template <typename T> void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream);
|
||||
static const func_t funcs[4] =
|
||||
{
|
||||
ResizeNearestDispatcher<T>::call,
|
||||
ResizeLinearDispatcher<T>::call,
|
||||
ResizeCubicDispatcher<T>::call,
|
||||
ResizeAreaDispatcher<T>::call
|
||||
};
|
||||
|
||||
// change to linear if area interpolation upscaling
|
||||
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
|
||||
interpolation = 1;
|
||||
|
||||
funcs[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), yoff, xoff, static_cast< PtrStepSz<T> >(dst), fy, fx, stream);
|
||||
}
|
||||
|
||||
template void resize<uchar >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
template void resize<uchar3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
template void resize<uchar4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize<ushort >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
template void resize<ushort3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
template void resize<ushort4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize<short >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
template void resize<short3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
template void resize<short4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize<float >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
template void resize<float3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
template void resize<float4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
389
modules/cudawarping/src/cuda/warp.cu
Normal file
389
modules/cudawarping/src/cuda/warp.cu
Normal file
@@ -0,0 +1,389 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/filters.hpp"
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
__constant__ float c_warpMat[3 * 3];
|
||||
|
||||
struct AffineTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
|
||||
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
struct PerspectiveTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
|
||||
|
||||
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
|
||||
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Build Maps
|
||||
|
||||
template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < xmap.cols && y < xmap.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
xmap(y, x) = coord.x;
|
||||
ymap(y, x) = coord.y;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
|
||||
|
||||
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Warp
|
||||
|
||||
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, bool)
|
||||
{
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
(void)srcWhole;
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_WARP_TEX(type) \
|
||||
texture< type , cudaTextureType2D > tex_warp_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_warp_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_warp_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_warp_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, bool cc20) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, bool) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
if (stream == 0)
|
||||
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc20);
|
||||
else
|
||||
WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc20);
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, typename T>
|
||||
void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const func_t funcs[3][5] =
|
||||
{
|
||||
{
|
||||
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
43
modules/cudawarping/src/precomp.cpp
Normal file
43
modules/cudawarping/src/precomp.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
57
modules/cudawarping/src/precomp.hpp
Normal file
57
modules/cudawarping/src/precomp.hpp
Normal file
@@ -0,0 +1,57 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_PRECOMP_H__
|
||||
#define __OPENCV_PRECOMP_H__
|
||||
|
||||
#include "opencv2/cudawarping.hpp"
|
||||
|
||||
#include "opencv2/core/private.cuda.hpp"
|
||||
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDALEGACY
|
||||
# include "opencv2/cudalegacy.hpp"
|
||||
# include "opencv2/cudalegacy/private.hpp"
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCV_PRECOMP_H__ */
|
244
modules/cudawarping/src/pyramids.cpp
Normal file
244
modules/cudawarping/src/pyramids.cpp
Normal file
@@ -0,0 +1,244 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::cuda::pyrDown(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::pyrUp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray, int, Stream&) { throw_no_cuda(); return Ptr<ImagePyramid>(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// pyrDown
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::pyrDown(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
using namespace cv::cuda::device::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{pyrDown_gpu<uchar> , 0 /*pyrDown_gpu<uchar2>*/ , pyrDown_gpu<uchar3> , pyrDown_gpu<uchar4> },
|
||||
{0 /*pyrDown_gpu<schar>*/, 0 /*pyrDown_gpu<schar2>*/ , 0 /*pyrDown_gpu<schar3>*/, 0 /*pyrDown_gpu<schar4>*/},
|
||||
{pyrDown_gpu<ushort> , 0 /*pyrDown_gpu<ushort2>*/, pyrDown_gpu<ushort3> , pyrDown_gpu<ushort4> },
|
||||
{pyrDown_gpu<short> , 0 /*pyrDown_gpu<short2>*/ , pyrDown_gpu<short3> , pyrDown_gpu<short4> },
|
||||
{0 /*pyrDown_gpu<int>*/ , 0 /*pyrDown_gpu<int2>*/ , 0 /*pyrDown_gpu<int3>*/ , 0 /*pyrDown_gpu<int4>*/ },
|
||||
{pyrDown_gpu<float> , 0 /*pyrDown_gpu<float2>*/ , pyrDown_gpu<float3> , pyrDown_gpu<float4> }
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
_dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
func(src, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// pyrUp
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::pyrUp(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
{
|
||||
using namespace cv::cuda::device::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{pyrUp_gpu<uchar> , 0 /*pyrUp_gpu<uchar2>*/ , pyrUp_gpu<uchar3> , pyrUp_gpu<uchar4> },
|
||||
{0 /*pyrUp_gpu<schar>*/, 0 /*pyrUp_gpu<schar2>*/ , 0 /*pyrUp_gpu<schar3>*/, 0 /*pyrUp_gpu<schar4>*/},
|
||||
{pyrUp_gpu<ushort> , 0 /*pyrUp_gpu<ushort2>*/, pyrUp_gpu<ushort3> , pyrUp_gpu<ushort4> },
|
||||
{pyrUp_gpu<short> , 0 /*pyrUp_gpu<short2>*/ , pyrUp_gpu<short3> , pyrUp_gpu<short4> },
|
||||
{0 /*pyrUp_gpu<int>*/ , 0 /*pyrUp_gpu<int2>*/ , 0 /*pyrUp_gpu<int3>*/ , 0 /*pyrUp_gpu<int4>*/ },
|
||||
{pyrUp_gpu<float> , 0 /*pyrUp_gpu<float2>*/ , pyrUp_gpu<float3> , pyrUp_gpu<float4> }
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
_dst.create(src.rows * 2, src.cols * 2, src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
func(src, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// ImagePyramid
|
||||
|
||||
#ifdef HAVE_OPENCV_CUDALEGACY
|
||||
|
||||
namespace
|
||||
{
|
||||
class ImagePyramidImpl : public ImagePyramid
|
||||
{
|
||||
public:
|
||||
ImagePyramidImpl(InputArray img, int nLayers, Stream& stream);
|
||||
|
||||
void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const;
|
||||
|
||||
private:
|
||||
GpuMat layer0_;
|
||||
std::vector<GpuMat> pyramid_;
|
||||
int nLayers_;
|
||||
};
|
||||
|
||||
ImagePyramidImpl::ImagePyramidImpl(InputArray _img, int numLayers, Stream& stream)
|
||||
{
|
||||
GpuMat img = _img.getGpuMat();
|
||||
|
||||
CV_Assert( img.depth() <= CV_32F && img.channels() <= 4 );
|
||||
|
||||
img.copyTo(layer0_, stream);
|
||||
|
||||
Size szLastLayer = img.size();
|
||||
nLayers_ = 1;
|
||||
|
||||
if (numLayers <= 0)
|
||||
numLayers = 255; // it will cut-off when any of the dimensions goes 1
|
||||
|
||||
pyramid_.resize(numLayers);
|
||||
|
||||
for (int i = 0; i < numLayers - 1; ++i)
|
||||
{
|
||||
Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
|
||||
|
||||
if (szCurLayer.width == 0 || szCurLayer.height == 0)
|
||||
break;
|
||||
|
||||
ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
|
||||
nLayers_++;
|
||||
|
||||
const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
|
||||
|
||||
cv::cuda::device::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
|
||||
|
||||
szLastLayer = szCurLayer;
|
||||
}
|
||||
}
|
||||
|
||||
void ImagePyramidImpl::getLayer(OutputArray _outImg, Size outRoi, Stream& stream) const
|
||||
{
|
||||
CV_Assert( outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0 );
|
||||
|
||||
ensureSizeIsEnough(outRoi, layer0_.type(), _outImg);
|
||||
GpuMat outImg = _outImg.getGpuMat();
|
||||
|
||||
if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
|
||||
{
|
||||
layer0_.copyTo(outImg, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
float lastScale = 1.0f;
|
||||
float curScale;
|
||||
GpuMat lastLayer = layer0_;
|
||||
GpuMat curLayer;
|
||||
|
||||
for (int i = 0; i < nLayers_ - 1; ++i)
|
||||
{
|
||||
curScale = lastScale * 0.5f;
|
||||
curLayer = pyramid_[i];
|
||||
|
||||
if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
|
||||
{
|
||||
curLayer.copyTo(outImg, stream);
|
||||
}
|
||||
|
||||
if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
|
||||
break;
|
||||
|
||||
lastScale = curScale;
|
||||
lastLayer = curLayer;
|
||||
}
|
||||
|
||||
cv::cuda::device::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray img, int nLayers, Stream& stream)
|
||||
{
|
||||
#ifndef HAVE_OPENCV_CUDALEGACY
|
||||
(void) img;
|
||||
(void) numLayers;
|
||||
(void) stream;
|
||||
throw_no_cuda();
|
||||
return Ptr<ImagePyramid>();
|
||||
#else
|
||||
return new ImagePyramidImpl(img, nLayers, stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
104
modules/cudawarping/src/remap.cpp
Normal file
104
modules/cudawarping/src/remap.cpp
Normal file
@@ -0,0 +1,104 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::cuda::remap(InputArray, OutputArray, InputArray, InputArray, int, int, Scalar, Stream&){ throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T>
|
||||
void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst,
|
||||
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
|
||||
{
|
||||
using namespace cv::cuda::device::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{remap_gpu<uchar> , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3> , remap_gpu<uchar4> },
|
||||
{0 /*remap_gpu<schar>*/, 0 /*remap_gpu<char2>*/ , 0 /*remap_gpu<char3>*/, 0 /*remap_gpu<char4>*/},
|
||||
{remap_gpu<ushort> , 0 /*remap_gpu<ushort2>*/, remap_gpu<ushort3> , remap_gpu<ushort4> },
|
||||
{remap_gpu<short> , 0 /*remap_gpu<short2>*/ , remap_gpu<short3> , remap_gpu<short4> },
|
||||
{0 /*remap_gpu<int>*/ , 0 /*remap_gpu<int2>*/ , 0 /*remap_gpu<int3>*/ , 0 /*remap_gpu<int4>*/ },
|
||||
{remap_gpu<float> , 0 /*remap_gpu<float2>*/ , remap_gpu<float3> , remap_gpu<float4> }
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
GpuMat xmap = _xmap.getGpuMat();
|
||||
GpuMat ymap = _ymap.getGpuMat();
|
||||
|
||||
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
|
||||
CV_Assert( xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size() );
|
||||
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
|
||||
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
if (!func)
|
||||
CV_Error(Error::StsUnsupportedFormat, "Unsupported input type");
|
||||
|
||||
_dst.create(xmap.size(), src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
Scalar_<float> borderValueFloat;
|
||||
borderValueFloat = borderValue;
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
|
||||
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
108
modules/cudawarping/src/resize.cpp
Normal file
108
modules/cudawarping/src/resize.cpp
Normal file
@@ -0,0 +1,108 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::cuda::resize(InputArray, OutputArray, Size, double, double, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename T>
|
||||
void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
void cv::cuda::resize(InputArray _src, OutputArray _dst, Size dsize, double fx, double fy, int interpolation, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
typedef void (*func_t)(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{device::resize<uchar> , 0 /*device::resize<uchar2>*/ , device::resize<uchar3> , device::resize<uchar4> },
|
||||
{0 /*device::resize<schar>*/, 0 /*device::resize<char2>*/ , 0 /*device::resize<char3>*/, 0 /*device::resize<char4>*/},
|
||||
{device::resize<ushort> , 0 /*device::resize<ushort2>*/, device::resize<ushort3> , device::resize<ushort4> },
|
||||
{device::resize<short> , 0 /*device::resize<short2>*/ , device::resize<short3> , device::resize<short4> },
|
||||
{0 /*device::resize<int>*/ , 0 /*device::resize<int2>*/ , 0 /*device::resize<int3>*/ , 0 /*device::resize<int4>*/ },
|
||||
{device::resize<float> , 0 /*device::resize<float2>*/ , device::resize<float3> , device::resize<float4> }
|
||||
};
|
||||
|
||||
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
|
||||
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA );
|
||||
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
|
||||
|
||||
if (dsize == Size())
|
||||
{
|
||||
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
|
||||
}
|
||||
else
|
||||
{
|
||||
fx = static_cast<double>(dsize.width) / src.cols;
|
||||
fy = static_cast<double>(dsize.height) / src.rows;
|
||||
}
|
||||
|
||||
_dst.create(dsize, src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
if (dsize == src.size())
|
||||
{
|
||||
src.copyTo(dst, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
|
||||
if (!func)
|
||||
CV_Error(Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
PtrStepSzb wholeSrc(wholeSize.height, wholeSize.width, src.datastart, src.step);
|
||||
|
||||
func(src, wholeSrc, ofs.y, ofs.x, dst, static_cast<float>(1.0 / fy), static_cast<float>(1.0 / fx), interpolation, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
656
modules/cudawarping/src/warp.cpp
Normal file
656
modules/cudawarping/src/warp.cpp
Normal file
@@ -0,0 +1,656 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::cuda::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::warpPerspective(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::buildWarpPerspectiveMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::buildWarpPlaneMaps(Size, Rect, InputArray, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::buildWarpCylindricalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::buildWarpSphericalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::rotate(InputArray, OutputArray, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::buildWarpAffineMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
|
||||
{
|
||||
using namespace cv::cuda::device::imgproc;
|
||||
|
||||
Mat M = _M.getMat();
|
||||
|
||||
CV_Assert( M.rows == 2 && M.cols == 3 );
|
||||
|
||||
_xmap.create(dsize, CV_32FC1);
|
||||
_ymap.create(dsize, CV_32FC1);
|
||||
|
||||
GpuMat xmap = _xmap.getGpuMat();
|
||||
GpuMat ymap = _ymap.getGpuMat();
|
||||
|
||||
float coeffs[2 * 3];
|
||||
Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
|
||||
|
||||
if (inverse)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invertAffineTransform(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
buildWarpAffineMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
|
||||
{
|
||||
using namespace cv::cuda::device::imgproc;
|
||||
|
||||
Mat M = _M.getMat();
|
||||
|
||||
CV_Assert( M.rows == 3 && M.cols == 3 );
|
||||
|
||||
_xmap.create(dsize, CV_32FC1);
|
||||
_ymap.create(dsize, CV_32FC1);
|
||||
|
||||
GpuMat xmap = _xmap.getGpuMat();
|
||||
GpuMat ymap = _ymap.getGpuMat();
|
||||
|
||||
float coeffs[3 * 3];
|
||||
Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
|
||||
|
||||
if (inverse)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invert(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
buildWarpPerspectiveMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <int DEPTH> struct NppWarpFunc
|
||||
{
|
||||
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
|
||||
|
||||
typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst,
|
||||
int dstStep, NppiRect dstRoi, const double coeffs[][3],
|
||||
int interpolation);
|
||||
};
|
||||
|
||||
template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
|
||||
{
|
||||
typedef typename NppWarpFunc<DEPTH>::npp_type npp_type;
|
||||
|
||||
static void call(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream)
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
|
||||
NppiRect srcroi;
|
||||
srcroi.x = 0;
|
||||
srcroi.y = 0;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
|
||||
NppiRect dstroi;
|
||||
dstroi.x = 0;
|
||||
dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
cv::cuda::NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi,
|
||||
coeffs, npp_inter[interpolation]) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
Mat M = _M.getMat();
|
||||
|
||||
CV_Assert( M.rows == 2 && M.cols == 3 );
|
||||
|
||||
const int interpolation = flags & INTER_MAX;
|
||||
|
||||
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
|
||||
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
|
||||
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
|
||||
|
||||
_dst.create(dsize, src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
static const bool useNppTab[6][4][3] =
|
||||
{
|
||||
{
|
||||
{false, false, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, true}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, true}
|
||||
}
|
||||
};
|
||||
|
||||
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
|
||||
// NPP bug on float data
|
||||
useNpp = useNpp && src.depth() != CV_32F;
|
||||
|
||||
if (useNpp)
|
||||
{
|
||||
typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[2][6][4] =
|
||||
{
|
||||
{
|
||||
{NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call},
|
||||
{NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call}
|
||||
},
|
||||
{
|
||||
{NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call},
|
||||
{NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call}
|
||||
}
|
||||
};
|
||||
|
||||
dst.setTo(borderValue, stream);
|
||||
|
||||
double coeffs[2][3];
|
||||
Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
|
||||
const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
|
||||
}
|
||||
else
|
||||
{
|
||||
using namespace cv::cuda::device::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{warpAffine_gpu<uchar> , 0 /*warpAffine_gpu<uchar2>*/ , warpAffine_gpu<uchar3> , warpAffine_gpu<uchar4> },
|
||||
{0 /*warpAffine_gpu<schar>*/, 0 /*warpAffine_gpu<char2>*/ , 0 /*warpAffine_gpu<char3>*/, 0 /*warpAffine_gpu<char4>*/},
|
||||
{warpAffine_gpu<ushort> , 0 /*warpAffine_gpu<ushort2>*/, warpAffine_gpu<ushort3> , warpAffine_gpu<ushort4> },
|
||||
{warpAffine_gpu<short> , 0 /*warpAffine_gpu<short2>*/ , warpAffine_gpu<short3> , warpAffine_gpu<short4> },
|
||||
{0 /*warpAffine_gpu<int>*/ , 0 /*warpAffine_gpu<int2>*/ , 0 /*warpAffine_gpu<int3>*/ , 0 /*warpAffine_gpu<int4>*/ },
|
||||
{warpAffine_gpu<float> , 0 /*warpAffine_gpu<float2>*/ , warpAffine_gpu<float3> , warpAffine_gpu<float4> }
|
||||
};
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
float coeffs[2 * 3];
|
||||
Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
|
||||
|
||||
if (flags & WARP_INVERSE_MAP)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invertAffineTransform(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
Scalar_<float> borderValueFloat;
|
||||
borderValueFloat = borderValue;
|
||||
|
||||
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
|
||||
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
Mat M = _M.getMat();
|
||||
|
||||
CV_Assert( M.rows == 3 && M.cols == 3 );
|
||||
|
||||
const int interpolation = flags & INTER_MAX;
|
||||
|
||||
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
|
||||
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
|
||||
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP) ;
|
||||
|
||||
_dst.create(dsize, src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
Size wholeSize;
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
|
||||
static const bool useNppTab[6][4][3] =
|
||||
{
|
||||
{
|
||||
{false, false, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
{false, false, false}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, true}
|
||||
},
|
||||
{
|
||||
{false, true, true},
|
||||
{false, false, false},
|
||||
{false, true, true},
|
||||
{false, false, true}
|
||||
}
|
||||
};
|
||||
|
||||
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
|
||||
// NPP bug on float data
|
||||
useNpp = useNpp && src.depth() != CV_32F;
|
||||
|
||||
if (useNpp)
|
||||
{
|
||||
typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[2][6][4] =
|
||||
{
|
||||
{
|
||||
{NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
|
||||
{NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
|
||||
},
|
||||
{
|
||||
{NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
|
||||
{0, 0, 0, 0},
|
||||
{NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
|
||||
{NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
|
||||
}
|
||||
};
|
||||
|
||||
dst.setTo(borderValue, stream);
|
||||
|
||||
double coeffs[3][3];
|
||||
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
|
||||
const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
|
||||
}
|
||||
else
|
||||
{
|
||||
using namespace cv::cuda::device::imgproc;
|
||||
|
||||
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{warpPerspective_gpu<uchar> , 0 /*warpPerspective_gpu<uchar2>*/ , warpPerspective_gpu<uchar3> , warpPerspective_gpu<uchar4> },
|
||||
{0 /*warpPerspective_gpu<schar>*/, 0 /*warpPerspective_gpu<char2>*/ , 0 /*warpPerspective_gpu<char3>*/, 0 /*warpPerspective_gpu<char4>*/},
|
||||
{warpPerspective_gpu<ushort> , 0 /*warpPerspective_gpu<ushort2>*/, warpPerspective_gpu<ushort3> , warpPerspective_gpu<ushort4> },
|
||||
{warpPerspective_gpu<short> , 0 /*warpPerspective_gpu<short2>*/ , warpPerspective_gpu<short3> , warpPerspective_gpu<short4> },
|
||||
{0 /*warpPerspective_gpu<int>*/ , 0 /*warpPerspective_gpu<int2>*/ , 0 /*warpPerspective_gpu<int3>*/ , 0 /*warpPerspective_gpu<int4>*/ },
|
||||
{warpPerspective_gpu<float> , 0 /*warpPerspective_gpu<float2>*/ , warpPerspective_gpu<float3> , warpPerspective_gpu<float4> }
|
||||
};
|
||||
|
||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
float coeffs[3 * 3];
|
||||
Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
|
||||
|
||||
if (flags & WARP_INVERSE_MAP)
|
||||
M.convertTo(coeffsMat, coeffsMat.type());
|
||||
else
|
||||
{
|
||||
cv::Mat iM;
|
||||
invert(M, iM);
|
||||
iM.convertTo(coeffsMat, coeffsMat.type());
|
||||
}
|
||||
|
||||
Scalar_<float> borderValueFloat;
|
||||
borderValueFloat = borderValue;
|
||||
|
||||
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
|
||||
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpPlaneMaps
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
|
||||
cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, InputArray _T,
|
||||
float scale, OutputArray _map_x, OutputArray _map_y, Stream& stream)
|
||||
{
|
||||
(void) src_size;
|
||||
|
||||
Mat K = _K.getMat();
|
||||
Mat R = _R.getMat();
|
||||
Mat T = _T.getMat();
|
||||
|
||||
CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
|
||||
CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
|
||||
CV_Assert( (T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32FC1 && T.isContinuous() );
|
||||
|
||||
Mat K_Rinv = K * R.t();
|
||||
Mat R_Kinv = R * K.inv();
|
||||
CV_Assert( K_Rinv.isContinuous() );
|
||||
CV_Assert( R_Kinv.isContinuous() );
|
||||
|
||||
_map_x.create(dst_roi.size(), CV_32FC1);
|
||||
_map_y.create(dst_roi.size(), CV_32FC1);
|
||||
|
||||
GpuMat map_x = _map_x.getGpuMat();
|
||||
GpuMat map_y = _map_y.getGpuMat();
|
||||
|
||||
device::imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
|
||||
T.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpCylyndricalMaps
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
|
||||
OutputArray _map_x, OutputArray _map_y, Stream& stream)
|
||||
{
|
||||
(void) src_size;
|
||||
|
||||
Mat K = _K.getMat();
|
||||
Mat R = _R.getMat();
|
||||
|
||||
CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
|
||||
CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
|
||||
|
||||
Mat K_Rinv = K * R.t();
|
||||
Mat R_Kinv = R * K.inv();
|
||||
CV_Assert( K_Rinv.isContinuous() );
|
||||
CV_Assert( R_Kinv.isContinuous() );
|
||||
|
||||
_map_x.create(dst_roi.size(), CV_32FC1);
|
||||
_map_y.create(dst_roi.size(), CV_32FC1);
|
||||
|
||||
GpuMat map_x = _map_x.getGpuMat();
|
||||
GpuMat map_y = _map_y.getGpuMat();
|
||||
|
||||
device::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpSphericalMaps
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
void cv::cuda::buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
|
||||
OutputArray _map_x, OutputArray _map_y, Stream& stream)
|
||||
{
|
||||
(void) src_size;
|
||||
|
||||
Mat K = _K.getMat();
|
||||
Mat R = _R.getMat();
|
||||
|
||||
CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
|
||||
CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
|
||||
|
||||
Mat K_Rinv = K * R.t();
|
||||
Mat R_Kinv = R * K.inv();
|
||||
CV_Assert( K_Rinv.isContinuous() );
|
||||
CV_Assert( R_Kinv.isContinuous() );
|
||||
|
||||
_map_x.create(dst_roi.size(), CV_32FC1);
|
||||
_map_y.create(dst_roi.size(), CV_32FC1);
|
||||
|
||||
GpuMat map_x = _map_x.getGpuMat();
|
||||
GpuMat map_y = _map_y.getGpuMat();
|
||||
|
||||
device::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// rotate
|
||||
|
||||
namespace
|
||||
{
|
||||
template <int DEPTH> struct NppRotateFunc
|
||||
{
|
||||
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
|
||||
|
||||
typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI,
|
||||
npp_type* pDst, int nDstStep, NppiRect oDstROI,
|
||||
double nAngle, double nShiftX, double nShiftY, int eInterpolation);
|
||||
};
|
||||
|
||||
template <int DEPTH, typename NppRotateFunc<DEPTH>::func_t func> struct NppRotate
|
||||
{
|
||||
typedef typename NppRotateFunc<DEPTH>::npp_type npp_type;
|
||||
|
||||
static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
(void)dsize;
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
NppiRect srcroi;
|
||||
srcroi.x = srcroi.y = 0;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
NppiRect dstroi;
|
||||
dstroi.x = dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void cv::cuda::rotate(InputArray _src, OutputArray _dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{NppRotate<CV_16U, nppiRotate_16u_C1R>::call, 0, NppRotate<CV_16U, nppiRotate_16u_C3R>::call, NppRotate<CV_16U, nppiRotate_16u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{0,0,0,0},
|
||||
{NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call}
|
||||
};
|
||||
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
|
||||
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
|
||||
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
|
||||
|
||||
_dst.create(dsize, src.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
dst.setTo(Scalar::all(0), stream);
|
||||
|
||||
funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
Reference in New Issue
Block a user