renamed gpuwarping -> cudawarping

This commit is contained in:
Vladislav Vinogradov
2013-07-23 15:57:59 +04:00
parent dcd600cc47
commit d4901a99ea
52 changed files with 45 additions and 45 deletions

View File

@@ -0,0 +1,221 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
// TODO use intrinsics like __sinf and so on
namespace build_warp_maps
{
__constant__ float ck_rinv[9];
__constant__ float cr_kinv[9];
__constant__ float ct[3];
__constant__ float cscale;
}
class PlaneMapper
{
public:
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
{
using namespace build_warp_maps;
float x_ = u / cscale - ct[0];
float y_ = v / cscale - ct[1];
float z;
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
x /= z;
y /= z;
}
};
class CylindricalMapper
{
public:
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
{
using namespace build_warp_maps;
u /= cscale;
float x_ = ::sinf(u);
float y_ = v / cscale;
float z_ = ::cosf(u);
float z;
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
if (z > 0) { x /= z; y /= z; }
else x = y = -1;
}
};
class SphericalMapper
{
public:
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
{
using namespace build_warp_maps;
v /= cscale;
u /= cscale;
float sinv = ::sinf(v);
float x_ = sinv * ::sinf(u);
float y_ = -::cosf(v);
float z_ = sinv * ::cosf(u);
float z;
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
if (z > 0) { x /= z; y /= z; }
else x = y = -1;
}
};
template <typename Mapper>
__global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,
PtrStepf map_x, PtrStepf map_y)
{
int du = blockIdx.x * blockDim.x + threadIdx.x;
int dv = blockIdx.y * blockDim.y + threadIdx.y;
if (du < cols && dv < rows)
{
float u = tl_u + du;
float v = tl_v + dv;
float x, y;
Mapper::mapBackward(u, v, x, y);
map_x.ptr(dv)[du] = x;
map_y.ptr(dv)[du] = y;
}
}
void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
const float k_rinv[9], const float r_kinv[9], const float t[3],
float scale, cudaStream_t stream)
{
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
int cols = map_x.cols;
int rows = map_x.rows;
dim3 threads(32, 8);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
buildWarpMapsKernel<PlaneMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
const float k_rinv[9], const float r_kinv[9], float scale,
cudaStream_t stream)
{
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
int cols = map_x.cols;
int rows = map_x.rows;
dim3 threads(32, 8);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
buildWarpMapsKernel<CylindricalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
const float k_rinv[9], const float r_kinv[9], float scale,
cudaStream_t stream)
{
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
int cols = map_x.cols;
int rows = map_x.rows;
dim3 threads(32, 8);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
} // namespace imgproc
}}} // namespace cv { namespace cuda { namespace cudev {
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,228 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
template <typename T, typename B> __global__ void pyrDown(const PtrStepSz<T> src, PtrStep<T> dst, const B b, int dst_cols)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_t;
__shared__ work_t smem[256 + 4];
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y;
const int src_y = 2 * y;
if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2)
{
{
work_t sum;
sum = 0.0625f * src(src_y - 2, x);
sum = sum + 0.25f * src(src_y - 1, x);
sum = sum + 0.375f * src(src_y , x);
sum = sum + 0.25f * src(src_y + 1, x);
sum = sum + 0.0625f * src(src_y + 2, x);
smem[2 + threadIdx.x] = sum;
}
if (threadIdx.x < 2)
{
const int left_x = x - 2;
work_t sum;
sum = 0.0625f * src(src_y - 2, left_x);
sum = sum + 0.25f * src(src_y - 1, left_x);
sum = sum + 0.375f * src(src_y , left_x);
sum = sum + 0.25f * src(src_y + 1, left_x);
sum = sum + 0.0625f * src(src_y + 2, left_x);
smem[threadIdx.x] = sum;
}
if (threadIdx.x > 253)
{
const int right_x = x + 2;
work_t sum;
sum = 0.0625f * src(src_y - 2, right_x);
sum = sum + 0.25f * src(src_y - 1, right_x);
sum = sum + 0.375f * src(src_y , right_x);
sum = sum + 0.25f * src(src_y + 1, right_x);
sum = sum + 0.0625f * src(src_y + 2, right_x);
smem[4 + threadIdx.x] = sum;
}
}
else
{
{
work_t sum;
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x));
sum = sum + 0.375f * src(src_y , b.idx_col_high(x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x));
smem[2 + threadIdx.x] = sum;
}
if (threadIdx.x < 2)
{
const int left_x = x - 2;
work_t sum;
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x));
sum = sum + 0.375f * src(src_y , b.idx_col(left_x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x));
smem[threadIdx.x] = sum;
}
if (threadIdx.x > 253)
{
const int right_x = x + 2;
work_t sum;
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x));
sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x));
smem[4 + threadIdx.x] = sum;
}
}
__syncthreads();
if (threadIdx.x < 128)
{
const int tid2 = threadIdx.x * 2;
work_t sum;
sum = 0.0625f * smem[2 + tid2 - 2];
sum = sum + 0.25f * smem[2 + tid2 - 1];
sum = sum + 0.375f * smem[2 + tid2 ];
sum = sum + 0.25f * smem[2 + tid2 + 1];
sum = sum + 0.0625f * smem[2 + tid2 + 2];
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
if (dst_x < dst_cols)
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
}
}
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(256);
const dim3 grid(divUp(src.cols, block.x), dst.rows);
B<T> b(src.rows, src.cols);
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
{
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
}
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,196 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
__shared__ sum_t s_srcPatch[10][10];
__shared__ sum_t s_dstPatch[20][16];
if (threadIdx.x < 10 && threadIdx.y < 10)
{
int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
srcx = ::abs(srcx);
srcx = ::min(src.cols - 1, srcx);
srcy = ::abs(srcy);
srcy = ::min(src.rows - 1, srcy);
s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
}
__syncthreads();
sum_t sum = VecTraits<sum_t>::all(0);
const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0);
const bool eveny = ((threadIdx.y & 1) == 0);
const int tidx = threadIdx.x;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
if (threadIdx.y < 2)
{
sum = VecTraits<sum_t>::all(0);
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[threadIdx.y][threadIdx.x] = sum;
}
if (threadIdx.y > 13)
{
sum = VecTraits<sum_t>::all(0);
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
}
__syncthreads();
sum = VecTraits<sum_t>::all(0);
const int tidy = threadIdx.y;
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x];
sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x];
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x];
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
if (x < dst.cols && y < dst.rows)
dst(y, x) = saturate_cast<T>(4.0f * sum);
}
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(16, 16);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
pyrUp<<<grid, block, 0, stream>>>(src, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
{
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
}
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,274 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/filters.hpp"
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
const float xcoo = mapx.ptr(y)[x];
const float ycoo = mapy.ptr(y)[x];
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
}
}
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
{
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool)
{
(void)srcWhole;
(void)xoff;
(void)yoff;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_remap_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff, yoff; \
tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
} \
}; \
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
PtrStepSz< type > dst, const float* borderValue, bool cc20) \
{ \
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
dim3 block(32, cc20 ? 8 : 4); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_remap_ ## type , srcWhole); \
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
PtrStepSz< type > dst, const float*, bool) \
{ \
dim3 block(32, 8); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_remap_ ## type , srcWhole); \
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
{ \
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
} \
else \
{ \
BrdReplicate<type> brd(src.rows, src.cols); \
BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
{
if (stream == 0)
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
else
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20);
}
};
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
static const caller_t callers[3][5] =
{
{
RemapDispatcher<PointFilter, BrdConstant, T>::call,
RemapDispatcher<PointFilter, BrdReplicate, T>::call,
RemapDispatcher<PointFilter, BrdReflect, T>::call,
RemapDispatcher<PointFilter, BrdWrap, T>::call,
RemapDispatcher<PointFilter, BrdReflect101, T>::call
},
{
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
RemapDispatcher<LinearFilter, BrdWrap, T>::call,
RemapDispatcher<LinearFilter, BrdReflect101, T>::call
},
{
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
RemapDispatcher<CubicFilter, BrdWrap, T>::call,
RemapDispatcher<CubicFilter, BrdReflect101, T>::call
}
};
callers[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, xmap, ymap,
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
}
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void remap_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
} // namespace imgproc
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,482 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include <cfloat>
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/filters.hpp"
namespace cv { namespace cuda { namespace device
{
// kernels
template <typename T> __global__ void resize_nearest(const PtrStep<T> src, PtrStepSz<T> dst, const float fy, const float fx)
{
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
dst(dst_y, dst_x) = src(__float2int_rz(src_y), __float2int_rz(src_x));
}
}
template <typename T> __global__ void resize_linear(const PtrStepSz<T> src, PtrStepSz<T> dst, const float fy, const float fx)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(src_x);
const int y1 = __float2int_rd(src_y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
const int x2_read = ::min(x2, src.cols - 1);
const int y2_read = ::min(y2, src.rows - 1);
T src_reg = src(y1, x1);
out = out + src_reg * ((x2 - src_x) * (y2 - src_y));
src_reg = src(y1, x2_read);
out = out + src_reg * ((src_x - x1) * (y2 - src_y));
src_reg = src(y2_read, x1);
out = out + src_reg * ((x2 - src_x) * (src_y - y1));
src_reg = src(y2_read, x2_read);
out = out + src_reg * ((src_x - x1) * (src_y - y1));
dst(dst_y, dst_x) = saturate_cast<T>(out);
}
}
template <class Ptr2D, typename T> __global__ void resize(const Ptr2D src, PtrStepSz<T> dst, const float fy, const float fx)
{
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
dst(dst_y, dst_x) = src(src_y, src_x);
}
}
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
dst(y, x) = src(y, x);
}
}
// textures
template <typename T> struct TextureAccessor;
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
texture<type, cudaTextureType2D, cudaReadModeElementType> tex_resize_##type (0, cudaFilterModePoint, cudaAddressModeClamp); \
template <> struct TextureAccessor<type> \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff; \
int yoff; \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_resize_##type, x + xoff, y + yoff); \
} \
__host__ static void bind(const PtrStepSz<type>& mat) \
{ \
bindTexture(&tex_resize_##type, mat); \
} \
};
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
template <typename T>
TextureAccessor<T> texAccessor(const PtrStepSz<T>& mat, int yoff, int xoff)
{
TextureAccessor<T>::bind(mat);
TextureAccessor<T> t;
t.xoff = xoff;
t.yoff = yoff;
return t;
}
// callers for nearest interpolation
template <typename T>
void call_resize_nearest_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize_nearest<<<grid, block, 0, stream>>>(src, dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T>
void call_resize_nearest_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize<<<grid, block>>>(texAccessor(srcWhole, yoff, xoff), dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
// callers for linear interpolation
template <typename T>
void call_resize_linear_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize_linear<<<grid, block>>>(src, dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T>
void call_resize_linear_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
if (srcWhole.data == src.data)
{
TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
LinearFilter< TextureAccessor<T> > filteredSrc(texSrc);
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
else
{
TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
LinearFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
// callers for cubic interpolation
template <typename T>
void call_resize_cubic_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
CubicFilter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block, 0, stream>>>(filteredSrc, dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T>
void call_resize_cubic_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
if (srcWhole.data == src.data)
{
TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
CubicFilter< TextureAccessor<T> > filteredSrc(texSrc);
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
else
{
TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
CubicFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
// ResizeNearestDispatcher
template <typename T> struct ResizeNearestDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
call_resize_nearest_glob(src, dst, fy, fx, stream);
}
};
template <typename T> struct SelectImplForNearest
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
if (stream)
call_resize_nearest_glob(src, dst, fy, fx, stream);
else
{
if (fx > 1 || fy > 1)
call_resize_nearest_glob(src, dst, fy, fx, 0);
else
call_resize_nearest_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
}
}
};
template <> struct ResizeNearestDispatcher<uchar> : SelectImplForNearest<uchar> {};
template <> struct ResizeNearestDispatcher<uchar4> : SelectImplForNearest<uchar4> {};
template <> struct ResizeNearestDispatcher<ushort> : SelectImplForNearest<ushort> {};
template <> struct ResizeNearestDispatcher<ushort4> : SelectImplForNearest<ushort4> {};
template <> struct ResizeNearestDispatcher<short> : SelectImplForNearest<short> {};
template <> struct ResizeNearestDispatcher<short4> : SelectImplForNearest<short4> {};
template <> struct ResizeNearestDispatcher<float> : SelectImplForNearest<float> {};
template <> struct ResizeNearestDispatcher<float4> : SelectImplForNearest<float4> {};
// ResizeLinearDispatcher
template <typename T> struct ResizeLinearDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
call_resize_linear_glob(src, dst, fy, fx, stream);
}
};
template <typename T> struct SelectImplForLinear
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
if (stream)
call_resize_linear_glob(src, dst, fy, fx, stream);
else
{
if (fx > 1 || fy > 1)
call_resize_linear_glob(src, dst, fy, fx, 0);
else
call_resize_linear_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
}
}
};
template <> struct ResizeLinearDispatcher<uchar> : SelectImplForLinear<uchar> {};
template <> struct ResizeLinearDispatcher<uchar4> : SelectImplForLinear<uchar4> {};
template <> struct ResizeLinearDispatcher<ushort> : SelectImplForLinear<ushort> {};
template <> struct ResizeLinearDispatcher<ushort4> : SelectImplForLinear<ushort4> {};
template <> struct ResizeLinearDispatcher<short> : SelectImplForLinear<short> {};
template <> struct ResizeLinearDispatcher<short4> : SelectImplForLinear<short4> {};
template <> struct ResizeLinearDispatcher<float> : SelectImplForLinear<float> {};
template <> struct ResizeLinearDispatcher<float4> : SelectImplForLinear<float4> {};
// ResizeCubicDispatcher
template <typename T> struct ResizeCubicDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
call_resize_cubic_glob(src, dst, fy, fx, stream);
}
};
template <typename T> struct SelectImplForCubic
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
if (stream)
call_resize_cubic_glob(src, dst, fy, fx, stream);
else
call_resize_cubic_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
}
};
template <> struct ResizeCubicDispatcher<uchar> : SelectImplForCubic<uchar> {};
template <> struct ResizeCubicDispatcher<uchar4> : SelectImplForCubic<uchar4> {};
template <> struct ResizeCubicDispatcher<ushort> : SelectImplForCubic<ushort> {};
template <> struct ResizeCubicDispatcher<ushort4> : SelectImplForCubic<ushort4> {};
template <> struct ResizeCubicDispatcher<short> : SelectImplForCubic<short> {};
template <> struct ResizeCubicDispatcher<short4> : SelectImplForCubic<short4> {};
template <> struct ResizeCubicDispatcher<float> : SelectImplForCubic<float> {};
template <> struct ResizeCubicDispatcher<float4> : SelectImplForCubic<float4> {};
// ResizeAreaDispatcher
template <typename T> struct ResizeAreaDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>&, int, int, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
const int iscale_x = (int) round(fx);
const int iscale_y = (int) round(fy);
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
if (std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
{
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
}
else
{
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
// resize
template <typename T> void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream)
{
typedef void (*func_t)(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream);
static const func_t funcs[4] =
{
ResizeNearestDispatcher<T>::call,
ResizeLinearDispatcher<T>::call,
ResizeCubicDispatcher<T>::call,
ResizeAreaDispatcher<T>::call
};
// change to linear if area interpolation upscaling
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
interpolation = 1;
funcs[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), yoff, xoff, static_cast< PtrStepSz<T> >(dst), fy, fx, stream);
}
template void resize<uchar >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<uchar3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<uchar4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<ushort >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<ushort3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<ushort4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<short >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<short3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<short4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<float >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<float3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<float4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
}}}
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,389 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/filters.hpp"
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
__constant__ float c_warpMat[3 * 3];
struct AffineTransform
{
static __device__ __forceinline__ float2 calcCoord(int x, int y)
{
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
return make_float2(xcoo, ycoo);
}
};
struct PerspectiveTransform
{
static __device__ __forceinline__ float2 calcCoord(int x, int y)
{
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
return make_float2(xcoo, ycoo);
}
};
///////////////////////////////////////////////////////////////////
// Build Maps
template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < xmap.cols && y < xmap.rows)
{
const float2 coord = Transform::calcCoord(x, y);
xmap(y, x) = coord.x;
ymap(y, x) = coord.y;
}
}
template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
}
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
}
///////////////////////////////////////////////////////////////////
// Warp
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
const float2 coord = Transform::calcCoord(x, y);
dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
}
}
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, bool)
{
(void)xoff;
(void)yoff;
(void)srcWhole;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
warp<Transform><<<grid, block>>>(filter_src, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
#define OPENCV_GPU_IMPLEMENT_WARP_TEX(type) \
texture< type , cudaTextureType2D > tex_warp_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_warp_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff, yoff; \
tex_warp_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_warp_ ## type , x + xoff, y + yoff); \
} \
}; \
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, bool cc20) \
{ \
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
dim3 block(32, cc20 ? 8 : 4); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_warp_ ## type , srcWhole); \
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, bool) \
{ \
dim3 block(32, 8); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_warp_ ## type , srcWhole); \
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
{ \
Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \
} \
else \
{ \
BrdReplicate<type> brd(src.rows, src.cols); \
BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar4)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(schar)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char2)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char4)
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_WARP_TEX(short)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(short2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(short4)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int2)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int4)
OPENCV_GPU_IMPLEMENT_WARP_TEX(float)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(float2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
{
if (stream == 0)
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc20);
else
WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc20);
}
};
template <class Transform, typename T>
void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
{
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
static const func_t funcs[3][5] =
{
{
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call
},
{
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call
},
{
WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call
}
};
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
}
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
}
template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
}
template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
} // namespace imgproc
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,43 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"

View File

@@ -0,0 +1,57 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_PRECOMP_H__
#define __OPENCV_PRECOMP_H__
#include "opencv2/cudawarping.hpp"
#include "opencv2/core/private.cuda.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_CUDALEGACY
# include "opencv2/cudalegacy.hpp"
# include "opencv2/cudalegacy/private.hpp"
#endif
#endif /* __OPENCV_PRECOMP_H__ */

View File

@@ -0,0 +1,244 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::cuda::pyrDown(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::pyrUp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray, int, Stream&) { throw_no_cuda(); return Ptr<ImagePyramid>(); }
#else // HAVE_CUDA
//////////////////////////////////////////////////////////////////////////////
// pyrDown
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
}
}}}
void cv::cuda::pyrDown(InputArray _src, OutputArray _dst, Stream& stream)
{
using namespace cv::cuda::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{pyrDown_gpu<uchar> , 0 /*pyrDown_gpu<uchar2>*/ , pyrDown_gpu<uchar3> , pyrDown_gpu<uchar4> },
{0 /*pyrDown_gpu<schar>*/, 0 /*pyrDown_gpu<schar2>*/ , 0 /*pyrDown_gpu<schar3>*/, 0 /*pyrDown_gpu<schar4>*/},
{pyrDown_gpu<ushort> , 0 /*pyrDown_gpu<ushort2>*/, pyrDown_gpu<ushort3> , pyrDown_gpu<ushort4> },
{pyrDown_gpu<short> , 0 /*pyrDown_gpu<short2>*/ , pyrDown_gpu<short3> , pyrDown_gpu<short4> },
{0 /*pyrDown_gpu<int>*/ , 0 /*pyrDown_gpu<int2>*/ , 0 /*pyrDown_gpu<int3>*/ , 0 /*pyrDown_gpu<int4>*/ },
{pyrDown_gpu<float> , 0 /*pyrDown_gpu<float2>*/ , pyrDown_gpu<float3> , pyrDown_gpu<float4> }
};
GpuMat src = _src.getGpuMat();
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
const func_t func = funcs[src.depth()][src.channels() - 1];
CV_Assert( func != 0 );
_dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
GpuMat dst = _dst.getGpuMat();
func(src, dst, StreamAccessor::getStream(stream));
}
//////////////////////////////////////////////////////////////////////////////
// pyrUp
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
}
}}}
void cv::cuda::pyrUp(InputArray _src, OutputArray _dst, Stream& stream)
{
using namespace cv::cuda::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{pyrUp_gpu<uchar> , 0 /*pyrUp_gpu<uchar2>*/ , pyrUp_gpu<uchar3> , pyrUp_gpu<uchar4> },
{0 /*pyrUp_gpu<schar>*/, 0 /*pyrUp_gpu<schar2>*/ , 0 /*pyrUp_gpu<schar3>*/, 0 /*pyrUp_gpu<schar4>*/},
{pyrUp_gpu<ushort> , 0 /*pyrUp_gpu<ushort2>*/, pyrUp_gpu<ushort3> , pyrUp_gpu<ushort4> },
{pyrUp_gpu<short> , 0 /*pyrUp_gpu<short2>*/ , pyrUp_gpu<short3> , pyrUp_gpu<short4> },
{0 /*pyrUp_gpu<int>*/ , 0 /*pyrUp_gpu<int2>*/ , 0 /*pyrUp_gpu<int3>*/ , 0 /*pyrUp_gpu<int4>*/ },
{pyrUp_gpu<float> , 0 /*pyrUp_gpu<float2>*/ , pyrUp_gpu<float3> , pyrUp_gpu<float4> }
};
GpuMat src = _src.getGpuMat();
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
const func_t func = funcs[src.depth()][src.channels() - 1];
CV_Assert( func != 0 );
_dst.create(src.rows * 2, src.cols * 2, src.type());
GpuMat dst = _dst.getGpuMat();
func(src, dst, StreamAccessor::getStream(stream));
}
//////////////////////////////////////////////////////////////////////////////
// ImagePyramid
#ifdef HAVE_OPENCV_CUDALEGACY
namespace
{
class ImagePyramidImpl : public ImagePyramid
{
public:
ImagePyramidImpl(InputArray img, int nLayers, Stream& stream);
void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const;
private:
GpuMat layer0_;
std::vector<GpuMat> pyramid_;
int nLayers_;
};
ImagePyramidImpl::ImagePyramidImpl(InputArray _img, int numLayers, Stream& stream)
{
GpuMat img = _img.getGpuMat();
CV_Assert( img.depth() <= CV_32F && img.channels() <= 4 );
img.copyTo(layer0_, stream);
Size szLastLayer = img.size();
nLayers_ = 1;
if (numLayers <= 0)
numLayers = 255; // it will cut-off when any of the dimensions goes 1
pyramid_.resize(numLayers);
for (int i = 0; i < numLayers - 1; ++i)
{
Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
if (szCurLayer.width == 0 || szCurLayer.height == 0)
break;
ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
nLayers_++;
const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
cv::cuda::device::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
szLastLayer = szCurLayer;
}
}
void ImagePyramidImpl::getLayer(OutputArray _outImg, Size outRoi, Stream& stream) const
{
CV_Assert( outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0 );
ensureSizeIsEnough(outRoi, layer0_.type(), _outImg);
GpuMat outImg = _outImg.getGpuMat();
if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
{
layer0_.copyTo(outImg, stream);
return;
}
float lastScale = 1.0f;
float curScale;
GpuMat lastLayer = layer0_;
GpuMat curLayer;
for (int i = 0; i < nLayers_ - 1; ++i)
{
curScale = lastScale * 0.5f;
curLayer = pyramid_[i];
if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
{
curLayer.copyTo(outImg, stream);
}
if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
break;
lastScale = curScale;
lastLayer = curLayer;
}
cv::cuda::device::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
}
}
#endif
Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray img, int nLayers, Stream& stream)
{
#ifndef HAVE_OPENCV_CUDALEGACY
(void) img;
(void) numLayers;
(void) stream;
throw_no_cuda();
return Ptr<ImagePyramid>();
#else
return new ImagePyramidImpl(img, nLayers, stream);
#endif
}
#endif // HAVE_CUDA

View File

@@ -0,0 +1,104 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::cuda::remap(InputArray, OutputArray, InputArray, InputArray, int, int, Scalar, Stream&){ throw_no_cuda(); }
#else // HAVE_CUDA
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
template <typename T>
void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst,
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
}
}}}
void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
{
using namespace cv::cuda::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
static const func_t funcs[6][4] =
{
{remap_gpu<uchar> , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3> , remap_gpu<uchar4> },
{0 /*remap_gpu<schar>*/, 0 /*remap_gpu<char2>*/ , 0 /*remap_gpu<char3>*/, 0 /*remap_gpu<char4>*/},
{remap_gpu<ushort> , 0 /*remap_gpu<ushort2>*/, remap_gpu<ushort3> , remap_gpu<ushort4> },
{remap_gpu<short> , 0 /*remap_gpu<short2>*/ , remap_gpu<short3> , remap_gpu<short4> },
{0 /*remap_gpu<int>*/ , 0 /*remap_gpu<int2>*/ , 0 /*remap_gpu<int3>*/ , 0 /*remap_gpu<int4>*/ },
{remap_gpu<float> , 0 /*remap_gpu<float2>*/ , remap_gpu<float3> , remap_gpu<float4> }
};
GpuMat src = _src.getGpuMat();
GpuMat xmap = _xmap.getGpuMat();
GpuMat ymap = _ymap.getGpuMat();
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size() );
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
const func_t func = funcs[src.depth()][src.channels() - 1];
if (!func)
CV_Error(Error::StsUnsupportedFormat, "Unsupported input type");
_dst.create(xmap.size(), src.type());
GpuMat dst = _dst.getGpuMat();
Scalar_<float> borderValueFloat;
borderValueFloat = borderValue;
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
}
#endif // HAVE_CUDA

View File

@@ -0,0 +1,108 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::cuda::resize(InputArray, OutputArray, Size, double, double, int, Stream&) { throw_no_cuda(); }
#else // HAVE_CUDA
namespace cv { namespace cuda { namespace device
{
template <typename T>
void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
}}}
void cv::cuda::resize(InputArray _src, OutputArray _dst, Size dsize, double fx, double fy, int interpolation, Stream& stream)
{
GpuMat src = _src.getGpuMat();
typedef void (*func_t)(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{device::resize<uchar> , 0 /*device::resize<uchar2>*/ , device::resize<uchar3> , device::resize<uchar4> },
{0 /*device::resize<schar>*/, 0 /*device::resize<char2>*/ , 0 /*device::resize<char3>*/, 0 /*device::resize<char4>*/},
{device::resize<ushort> , 0 /*device::resize<ushort2>*/, device::resize<ushort3> , device::resize<ushort4> },
{device::resize<short> , 0 /*device::resize<short2>*/ , device::resize<short3> , device::resize<short4> },
{0 /*device::resize<int>*/ , 0 /*device::resize<int2>*/ , 0 /*device::resize<int3>*/ , 0 /*device::resize<int4>*/ },
{device::resize<float> , 0 /*device::resize<float2>*/ , device::resize<float3> , device::resize<float4> }
};
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA );
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
if (dsize == Size())
{
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
}
else
{
fx = static_cast<double>(dsize.width) / src.cols;
fy = static_cast<double>(dsize.height) / src.rows;
}
_dst.create(dsize, src.type());
GpuMat dst = _dst.getGpuMat();
if (dsize == src.size())
{
src.copyTo(dst, stream);
return;
}
const func_t func = funcs[src.depth()][src.channels() - 1];
if (!func)
CV_Error(Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
PtrStepSzb wholeSrc(wholeSize.height, wholeSize.width, src.datastart, src.step);
func(src, wholeSrc, ofs.y, ofs.x, dst, static_cast<float>(1.0 / fy), static_cast<float>(1.0 / fx), interpolation, StreamAccessor::getStream(stream));
}
#endif // HAVE_CUDA

View File

@@ -0,0 +1,656 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::cuda::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
void cv::cuda::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::warpPerspective(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
void cv::cuda::buildWarpPerspectiveMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::buildWarpPlaneMaps(Size, Rect, InputArray, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::buildWarpCylindricalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::buildWarpSphericalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::rotate(InputArray, OutputArray, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
#else // HAVE_CUDA
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
template <typename T>
void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
template <typename T>
void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
}
}}}
void cv::cuda::buildWarpAffineMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
{
using namespace cv::cuda::device::imgproc;
Mat M = _M.getMat();
CV_Assert( M.rows == 2 && M.cols == 3 );
_xmap.create(dsize, CV_32FC1);
_ymap.create(dsize, CV_32FC1);
GpuMat xmap = _xmap.getGpuMat();
GpuMat ymap = _ymap.getGpuMat();
float coeffs[2 * 3];
Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
if (inverse)
M.convertTo(coeffsMat, coeffsMat.type());
else
{
cv::Mat iM;
invertAffineTransform(M, iM);
iM.convertTo(coeffsMat, coeffsMat.type());
}
buildWarpAffineMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
}
void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
{
using namespace cv::cuda::device::imgproc;
Mat M = _M.getMat();
CV_Assert( M.rows == 3 && M.cols == 3 );
_xmap.create(dsize, CV_32FC1);
_ymap.create(dsize, CV_32FC1);
GpuMat xmap = _xmap.getGpuMat();
GpuMat ymap = _ymap.getGpuMat();
float coeffs[3 * 3];
Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
if (inverse)
M.convertTo(coeffsMat, coeffsMat.type());
else
{
cv::Mat iM;
invert(M, iM);
iM.convertTo(coeffsMat, coeffsMat.type());
}
buildWarpPerspectiveMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
}
namespace
{
template <int DEPTH> struct NppWarpFunc
{
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst,
int dstStep, NppiRect dstRoi, const double coeffs[][3],
int interpolation);
};
template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
{
typedef typename NppWarpFunc<DEPTH>::npp_type npp_type;
static void call(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream)
{
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
NppiSize srcsz;
srcsz.height = src.rows;
srcsz.width = src.cols;
NppiRect srcroi;
srcroi.x = 0;
srcroi.y = 0;
srcroi.height = src.rows;
srcroi.width = src.cols;
NppiRect dstroi;
dstroi.x = 0;
dstroi.y = 0;
dstroi.height = dst.rows;
dstroi.width = dst.cols;
cv::cuda::NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi,
coeffs, npp_inter[interpolation]) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
}
void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
{
GpuMat src = _src.getGpuMat();
Mat M = _M.getMat();
CV_Assert( M.rows == 2 && M.cols == 3 );
const int interpolation = flags & INTER_MAX;
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
_dst.create(dsize, src.type());
GpuMat dst = _dst.getGpuMat();
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
static const bool useNppTab[6][4][3] =
{
{
{false, false, true},
{false, false, false},
{false, true, true},
{false, false, false}
},
{
{false, false, false},
{false, false, false},
{false, false, false},
{false, false, false}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, false}
},
{
{false, false, false},
{false, false, false},
{false, false, false},
{false, false, false}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, true}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, true}
}
};
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
// NPP bug on float data
useNpp = useNpp && src.depth() != CV_32F;
if (useNpp)
{
typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
static const func_t funcs[2][6][4] =
{
{
{NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call}
},
{
{NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call}
}
};
dst.setTo(borderValue, stream);
double coeffs[2][3];
Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
M.convertTo(coeffsMat, coeffsMat.type());
const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
CV_Assert(func != 0);
func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
}
else
{
using namespace cv::cuda::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
static const func_t funcs[6][4] =
{
{warpAffine_gpu<uchar> , 0 /*warpAffine_gpu<uchar2>*/ , warpAffine_gpu<uchar3> , warpAffine_gpu<uchar4> },
{0 /*warpAffine_gpu<schar>*/, 0 /*warpAffine_gpu<char2>*/ , 0 /*warpAffine_gpu<char3>*/, 0 /*warpAffine_gpu<char4>*/},
{warpAffine_gpu<ushort> , 0 /*warpAffine_gpu<ushort2>*/, warpAffine_gpu<ushort3> , warpAffine_gpu<ushort4> },
{warpAffine_gpu<short> , 0 /*warpAffine_gpu<short2>*/ , warpAffine_gpu<short3> , warpAffine_gpu<short4> },
{0 /*warpAffine_gpu<int>*/ , 0 /*warpAffine_gpu<int2>*/ , 0 /*warpAffine_gpu<int3>*/ , 0 /*warpAffine_gpu<int4>*/ },
{warpAffine_gpu<float> , 0 /*warpAffine_gpu<float2>*/ , warpAffine_gpu<float3> , warpAffine_gpu<float4> }
};
const func_t func = funcs[src.depth()][src.channels() - 1];
CV_Assert(func != 0);
float coeffs[2 * 3];
Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
if (flags & WARP_INVERSE_MAP)
M.convertTo(coeffsMat, coeffsMat.type());
else
{
cv::Mat iM;
invertAffineTransform(M, iM);
iM.convertTo(coeffsMat, coeffsMat.type());
}
Scalar_<float> borderValueFloat;
borderValueFloat = borderValue;
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
}
}
void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
{
GpuMat src = _src.getGpuMat();
Mat M = _M.getMat();
CV_Assert( M.rows == 3 && M.cols == 3 );
const int interpolation = flags & INTER_MAX;
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP) ;
_dst.create(dsize, src.type());
GpuMat dst = _dst.getGpuMat();
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
static const bool useNppTab[6][4][3] =
{
{
{false, false, true},
{false, false, false},
{false, true, true},
{false, false, false}
},
{
{false, false, false},
{false, false, false},
{false, false, false},
{false, false, false}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, false}
},
{
{false, false, false},
{false, false, false},
{false, false, false},
{false, false, false}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, true}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, true}
}
};
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
// NPP bug on float data
useNpp = useNpp && src.depth() != CV_32F;
if (useNpp)
{
typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
static const func_t funcs[2][6][4] =
{
{
{NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
},
{
{NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
}
};
dst.setTo(borderValue, stream);
double coeffs[3][3];
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
M.convertTo(coeffsMat, coeffsMat.type());
const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
CV_Assert(func != 0);
func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
}
else
{
using namespace cv::cuda::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
static const func_t funcs[6][4] =
{
{warpPerspective_gpu<uchar> , 0 /*warpPerspective_gpu<uchar2>*/ , warpPerspective_gpu<uchar3> , warpPerspective_gpu<uchar4> },
{0 /*warpPerspective_gpu<schar>*/, 0 /*warpPerspective_gpu<char2>*/ , 0 /*warpPerspective_gpu<char3>*/, 0 /*warpPerspective_gpu<char4>*/},
{warpPerspective_gpu<ushort> , 0 /*warpPerspective_gpu<ushort2>*/, warpPerspective_gpu<ushort3> , warpPerspective_gpu<ushort4> },
{warpPerspective_gpu<short> , 0 /*warpPerspective_gpu<short2>*/ , warpPerspective_gpu<short3> , warpPerspective_gpu<short4> },
{0 /*warpPerspective_gpu<int>*/ , 0 /*warpPerspective_gpu<int2>*/ , 0 /*warpPerspective_gpu<int3>*/ , 0 /*warpPerspective_gpu<int4>*/ },
{warpPerspective_gpu<float> , 0 /*warpPerspective_gpu<float2>*/ , warpPerspective_gpu<float3> , warpPerspective_gpu<float4> }
};
const func_t func = funcs[src.depth()][src.channels() - 1];
CV_Assert(func != 0);
float coeffs[3 * 3];
Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
if (flags & WARP_INVERSE_MAP)
M.convertTo(coeffsMat, coeffsMat.type());
else
{
cv::Mat iM;
invert(M, iM);
iM.convertTo(coeffsMat, coeffsMat.type());
}
Scalar_<float> borderValueFloat;
borderValueFloat = borderValue;
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
}
}
//////////////////////////////////////////////////////////////////////////////
// buildWarpPlaneMaps
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
cudaStream_t stream);
}
}}}
void cv::cuda::buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, InputArray _T,
float scale, OutputArray _map_x, OutputArray _map_y, Stream& stream)
{
(void) src_size;
Mat K = _K.getMat();
Mat R = _R.getMat();
Mat T = _T.getMat();
CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
CV_Assert( (T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32FC1 && T.isContinuous() );
Mat K_Rinv = K * R.t();
Mat R_Kinv = R * K.inv();
CV_Assert( K_Rinv.isContinuous() );
CV_Assert( R_Kinv.isContinuous() );
_map_x.create(dst_roi.size(), CV_32FC1);
_map_y.create(dst_roi.size(), CV_32FC1);
GpuMat map_x = _map_x.getGpuMat();
GpuMat map_y = _map_y.getGpuMat();
device::imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
T.ptr<float>(), scale, StreamAccessor::getStream(stream));
}
//////////////////////////////////////////////////////////////////////////////
// buildWarpCylyndricalMaps
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
const float k_rinv[9], const float r_kinv[9], float scale,
cudaStream_t stream);
}
}}}
void cv::cuda::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
OutputArray _map_x, OutputArray _map_y, Stream& stream)
{
(void) src_size;
Mat K = _K.getMat();
Mat R = _R.getMat();
CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
Mat K_Rinv = K * R.t();
Mat R_Kinv = R * K.inv();
CV_Assert( K_Rinv.isContinuous() );
CV_Assert( R_Kinv.isContinuous() );
_map_x.create(dst_roi.size(), CV_32FC1);
_map_y.create(dst_roi.size(), CV_32FC1);
GpuMat map_x = _map_x.getGpuMat();
GpuMat map_y = _map_y.getGpuMat();
device::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
}
//////////////////////////////////////////////////////////////////////////////
// buildWarpSphericalMaps
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
const float k_rinv[9], const float r_kinv[9], float scale,
cudaStream_t stream);
}
}}}
void cv::cuda::buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
OutputArray _map_x, OutputArray _map_y, Stream& stream)
{
(void) src_size;
Mat K = _K.getMat();
Mat R = _R.getMat();
CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
Mat K_Rinv = K * R.t();
Mat R_Kinv = R * K.inv();
CV_Assert( K_Rinv.isContinuous() );
CV_Assert( R_Kinv.isContinuous() );
_map_x.create(dst_roi.size(), CV_32FC1);
_map_y.create(dst_roi.size(), CV_32FC1);
GpuMat map_x = _map_x.getGpuMat();
GpuMat map_y = _map_y.getGpuMat();
device::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
}
////////////////////////////////////////////////////////////////////////
// rotate
namespace
{
template <int DEPTH> struct NppRotateFunc
{
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI,
npp_type* pDst, int nDstStep, NppiRect oDstROI,
double nAngle, double nShiftX, double nShiftY, int eInterpolation);
};
template <int DEPTH, typename NppRotateFunc<DEPTH>::func_t func> struct NppRotate
{
typedef typename NppRotateFunc<DEPTH>::npp_type npp_type;
static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
{
(void)dsize;
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
NppStreamHandler h(stream);
NppiSize srcsz;
srcsz.height = src.rows;
srcsz.width = src.cols;
NppiRect srcroi;
srcroi.x = srcroi.y = 0;
srcroi.height = src.rows;
srcroi.width = src.cols;
NppiRect dstroi;
dstroi.x = dstroi.y = 0;
dstroi.height = dst.rows;
dstroi.width = dst.cols;
nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
}
void cv::cuda::rotate(InputArray _src, OutputArray _dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call},
{0,0,0,0},
{NppRotate<CV_16U, nppiRotate_16u_C1R>::call, 0, NppRotate<CV_16U, nppiRotate_16u_C3R>::call, NppRotate<CV_16U, nppiRotate_16u_C4R>::call},
{0,0,0,0},
{0,0,0,0},
{NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call}
};
GpuMat src = _src.getGpuMat();
CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
_dst.create(dsize, src.type());
GpuMat dst = _dst.getGpuMat();
dst.setTo(Scalar::all(0), stream);
funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
}
#endif // HAVE_CUDA