gpuwarping module for image warping
This commit is contained in:
@@ -399,172 +399,6 @@ namespace cv { namespace gpu { namespace cudev
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpMaps
|
||||
|
||||
// TODO use intrinsics like __sinf and so on
|
||||
|
||||
namespace build_warp_maps
|
||||
{
|
||||
|
||||
__constant__ float ck_rinv[9];
|
||||
__constant__ float cr_kinv[9];
|
||||
__constant__ float ct[3];
|
||||
__constant__ float cscale;
|
||||
}
|
||||
|
||||
|
||||
class PlaneMapper
|
||||
{
|
||||
public:
|
||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||
{
|
||||
using namespace build_warp_maps;
|
||||
|
||||
float x_ = u / cscale - ct[0];
|
||||
float y_ = v / cscale - ct[1];
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);
|
||||
|
||||
x /= z;
|
||||
y /= z;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class CylindricalMapper
|
||||
{
|
||||
public:
|
||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||
{
|
||||
using namespace build_warp_maps;
|
||||
|
||||
u /= cscale;
|
||||
float x_ = ::sinf(u);
|
||||
float y_ = v / cscale;
|
||||
float z_ = ::cosf(u);
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class SphericalMapper
|
||||
{
|
||||
public:
|
||||
static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)
|
||||
{
|
||||
using namespace build_warp_maps;
|
||||
|
||||
v /= cscale;
|
||||
u /= cscale;
|
||||
|
||||
float sinv = ::sinf(v);
|
||||
float x_ = sinv * ::sinf(u);
|
||||
float y_ = -::cosf(v);
|
||||
float z_ = sinv * ::cosf(u);
|
||||
|
||||
float z;
|
||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||
y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;
|
||||
z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;
|
||||
|
||||
if (z > 0) { x /= z; y /= z; }
|
||||
else x = y = -1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Mapper>
|
||||
__global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,
|
||||
PtrStepf map_x, PtrStepf map_y)
|
||||
{
|
||||
int du = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int dv = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (du < cols && dv < rows)
|
||||
{
|
||||
float u = tl_u + du;
|
||||
float v = tl_v + dv;
|
||||
float x, y;
|
||||
Mapper::mapBackward(u, v, x, y);
|
||||
map_x.ptr(dv)[du] = x;
|
||||
map_y.ptr(dv)[du] = y;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], const float t[3],
|
||||
float scale, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<PlaneMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<CylindricalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev {
|
||||
|
||||
|
||||
@@ -1,228 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T, typename B> __global__ void pyrDown(const PtrStepSz<T> src, PtrStep<T> dst, const B b, int dst_cols)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_t;
|
||||
|
||||
__shared__ work_t smem[256 + 4];
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y;
|
||||
|
||||
const int src_y = 2 * y;
|
||||
|
||||
if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2)
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, x);
|
||||
sum = sum + 0.25f * src(src_y - 1, x);
|
||||
sum = sum + 0.375f * src(src_y , x);
|
||||
sum = sum + 0.25f * src(src_y + 1, x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, x);
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, left_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, left_x);
|
||||
sum = sum + 0.375f * src(src_y , left_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, left_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, left_x);
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, right_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, right_x);
|
||||
sum = sum + 0.375f * src(src_y , right_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, right_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, right_x);
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x));
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x));
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x));
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < 128)
|
||||
{
|
||||
const int tid2 = threadIdx.x * 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
|
||||
|
||||
if (dst_x < dst_cols)
|
||||
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(256);
|
||||
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
||||
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,196 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
__shared__ sum_t s_srcPatch[10][10];
|
||||
__shared__ sum_t s_dstPatch[20][16];
|
||||
|
||||
if (threadIdx.x < 10 && threadIdx.y < 10)
|
||||
{
|
||||
int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
|
||||
int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
|
||||
|
||||
srcx = ::abs(srcx);
|
||||
srcx = ::min(src.cols - 1, srcx);
|
||||
|
||||
srcy = ::abs(srcy);
|
||||
srcy = ::min(src.rows - 1, srcy);
|
||||
|
||||
s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
|
||||
const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0);
|
||||
const bool eveny = ((threadIdx.y & 1) == 0);
|
||||
const int tidx = threadIdx.x;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
|
||||
|
||||
if (threadIdx.y < 2)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.y > 13)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int tidy = threadIdx.y;
|
||||
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x];
|
||||
sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x];
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
dst(y, x) = saturate_cast<T>(4.0f * sum);
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
pyrUp<<<grid, block, 0, stream>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,274 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float xcoo = mapx.ptr(y)[x];
|
||||
const float ycoo = mapy.ptr(y)[x];
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
|
||||
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_remap_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float* borderValue, bool cc20) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float*, bool) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
if (stream == 0)
|
||||
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
|
||||
else
|
||||
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const caller_t callers[3][5] =
|
||||
{
|
||||
{
|
||||
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<LinearFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<CubicFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdWrap, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
callers[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void remap_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void remap_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void remap_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,302 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <cfloat>
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/filters.hpp"
|
||||
#include "opencv2/core/cuda/scan.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float xcoo = x * fx;
|
||||
const float ycoo = y * fy;
|
||||
|
||||
dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
dst(y, x) = saturate_cast<T>(src(y, x));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
|
||||
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
|
||||
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
|
||||
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_resize_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
const int xoff; \
|
||||
const int yoff; \
|
||||
__host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_resize_ ## type, srcWhole); \
|
||||
tex_resize_ ## type ## _reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate< type > brd(src.rows, src.cols); \
|
||||
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
|
||||
else
|
||||
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcher<AreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
int iscale_x = (int)round(fx);
|
||||
int iscale_y = (int)round(fy);
|
||||
|
||||
if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
|
||||
ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
else
|
||||
ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
PtrStepSzb dst, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[4] =
|
||||
{
|
||||
ResizeDispatcher<PointFilter, T>::call,
|
||||
ResizeDispatcher<LinearFilter, T>::call,
|
||||
ResizeDispatcher<CubicFilter, T>::call,
|
||||
ResizeDispatcher<AreaFilter, T>::call
|
||||
};
|
||||
// chenge to linear if area interpolation upscaling
|
||||
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
|
||||
interpolation = 1;
|
||||
|
||||
callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
|
||||
static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template<typename T> struct scan_traits{};
|
||||
|
||||
template<> struct scan_traits<uchar>
|
||||
{
|
||||
typedef float scan_line_type;
|
||||
};
|
||||
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,389 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/border_interpolate.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
__constant__ float c_warpMat[3 * 3];
|
||||
|
||||
struct AffineTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
|
||||
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
struct PerspectiveTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
|
||||
|
||||
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
|
||||
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Build Maps
|
||||
|
||||
template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < xmap.cols && y < xmap.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
xmap(y, x) = coord.x;
|
||||
ymap(y, x) = coord.y;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
|
||||
|
||||
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Warp
|
||||
|
||||
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, bool)
|
||||
{
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
(void)srcWhole;
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_WARP_TEX(type) \
|
||||
texture< type , cudaTextureType2D > tex_warp_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_warp_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_warp_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_warp_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, bool cc20) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, bool) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
if (stream == 0)
|
||||
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc20);
|
||||
else
|
||||
WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc20);
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, typename T>
|
||||
void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
static const func_t funcs[3][5] =
|
||||
{
|
||||
{
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
|
||||
}
|
||||
|
||||
template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
//template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
|
||||
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
Reference in New Issue
Block a user