gpu : implement Bayer* -> Gray color conversion
This commit is contained in:
@@ -42,42 +42,37 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <opencv2/gpu/device/common.hpp>
|
||||
#include <opencv2/gpu/device/vec_traits.hpp>
|
||||
#include <opencv2/gpu/device/vec_math.hpp>
|
||||
#include <opencv2/gpu/device/limits.hpp>
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/color.hpp"
|
||||
|
||||
namespace cv { namespace gpu {
|
||||
namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct Bayer2BGR;
|
||||
|
||||
template <> struct Bayer2BGR<uchar>
|
||||
{
|
||||
template <typename D>
|
||||
__global__ void Bayer2BGR_8u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
|
||||
uchar3 res0;
|
||||
uchar3 res1;
|
||||
uchar3 res2;
|
||||
uchar3 res3;
|
||||
|
||||
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
|
||||
{
|
||||
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (s_y >= dst.rows || (s_x << 2) >= dst.cols)
|
||||
return;
|
||||
|
||||
s_y = ::min(::max(s_y, 1), dst.rows - 2);
|
||||
|
||||
uchar4 patch[3][3];
|
||||
patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x];
|
||||
patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
|
||||
patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
|
||||
patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
|
||||
|
||||
patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x];
|
||||
patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)];
|
||||
patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
|
||||
patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
|
||||
|
||||
patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x];
|
||||
patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
|
||||
patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
|
||||
|
||||
D res0 = VecTraits<D>::all(numeric_limits<uchar>::max());
|
||||
D res1 = VecTraits<D>::all(numeric_limits<uchar>::max());
|
||||
D res2 = VecTraits<D>::all(numeric_limits<uchar>::max());
|
||||
D res3 = VecTraits<D>::all(numeric_limits<uchar>::max());
|
||||
patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
|
||||
|
||||
if ((s_y & 1) ^ start_with_green)
|
||||
{
|
||||
@@ -181,45 +176,69 @@ namespace cv { namespace gpu {
|
||||
res3.z = t7;
|
||||
}
|
||||
}
|
||||
|
||||
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
|
||||
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
dst(d_y, d_x) = res0;
|
||||
if (d_x + 1 < dst.cols)
|
||||
dst(d_y, d_x + 1) = res1;
|
||||
if (d_x + 2 < dst.cols)
|
||||
dst(d_y, d_x + 2) = res2;
|
||||
if (d_x + 3 < dst.cols)
|
||||
dst(d_y, d_x + 3) = res3;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename D>
|
||||
__global__ void Bayer2BGR_16u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
|
||||
template <typename D> __device__ __forceinline__ D toDst(const uchar3& pix);
|
||||
template <> __device__ __forceinline__ uchar toDst<uchar>(const uchar3& pix)
|
||||
{
|
||||
typename bgr_to_gray_traits<uchar>::functor_type f = bgr_to_gray_traits<uchar>::create_functor();
|
||||
return f(pix);
|
||||
}
|
||||
template <> __device__ __forceinline__ uchar3 toDst<uchar3>(const uchar3& pix)
|
||||
{
|
||||
return pix;
|
||||
}
|
||||
template <> __device__ __forceinline__ uchar4 toDst<uchar4>(const uchar3& pix)
|
||||
{
|
||||
return make_uchar4(pix.x, pix.y, pix.z, 255);
|
||||
}
|
||||
|
||||
template <typename D>
|
||||
__global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
|
||||
{
|
||||
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (s_y >= src.rows || (s_x << 2) >= src.cols)
|
||||
return;
|
||||
|
||||
s_y = ::min(::max(s_y, 1), src.rows - 2);
|
||||
|
||||
Bayer2BGR<uchar> bayer;
|
||||
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
|
||||
|
||||
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
|
||||
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
dst(d_y, d_x) = toDst<D>(bayer.res0);
|
||||
if (d_x + 1 < src.cols)
|
||||
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
|
||||
if (d_x + 2 < src.cols)
|
||||
dst(d_y, d_x + 2) = toDst<D>(bayer.res2);
|
||||
if (d_x + 3 < src.cols)
|
||||
dst(d_y, d_x + 3) = toDst<D>(bayer.res3);
|
||||
}
|
||||
|
||||
template <> struct Bayer2BGR<ushort>
|
||||
{
|
||||
ushort3 res0;
|
||||
ushort3 res1;
|
||||
|
||||
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
|
||||
{
|
||||
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (s_y >= dst.rows || (s_x << 1) >= dst.cols)
|
||||
return;
|
||||
|
||||
s_y = ::min(::max(s_y, 1), dst.rows - 2);
|
||||
|
||||
ushort2 patch[3][3];
|
||||
patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x];
|
||||
patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
|
||||
patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
|
||||
patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
|
||||
|
||||
patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x];
|
||||
patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)];
|
||||
patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
|
||||
patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
|
||||
|
||||
patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x];
|
||||
patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
|
||||
patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
|
||||
|
||||
D res0 = VecTraits<D>::all(numeric_limits<ushort>::max());
|
||||
D res1 = VecTraits<D>::all(numeric_limits<ushort>::max());
|
||||
patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
|
||||
|
||||
if ((s_y & 1) ^ start_with_green)
|
||||
{
|
||||
@@ -279,53 +298,87 @@ namespace cv { namespace gpu {
|
||||
res1.z = t3;
|
||||
}
|
||||
}
|
||||
|
||||
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
|
||||
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
dst(d_y, d_x) = res0;
|
||||
if (d_x + 1 < dst.cols)
|
||||
dst(d_y, d_x + 1) = res1;
|
||||
}
|
||||
};
|
||||
|
||||
template <int cn>
|
||||
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(dst.cols, 4 * block.x), divUp(dst.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
|
||||
|
||||
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <int cn>
|
||||
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(dst.cols, 2 * block.x), divUp(dst.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
|
||||
|
||||
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template <typename D> __device__ __forceinline__ D toDst(const ushort3& pix);
|
||||
template <> __device__ __forceinline__ ushort toDst<ushort>(const ushort3& pix)
|
||||
{
|
||||
typename bgr_to_gray_traits<ushort>::functor_type f = bgr_to_gray_traits<ushort>::create_functor();
|
||||
return f(pix);
|
||||
}
|
||||
template <> __device__ __forceinline__ ushort3 toDst<ushort3>(const ushort3& pix)
|
||||
{
|
||||
return pix;
|
||||
}
|
||||
template <> __device__ __forceinline__ ushort4 toDst<ushort4>(const ushort3& pix)
|
||||
{
|
||||
return make_ushort4(pix.x, pix.y, pix.z, numeric_limits<ushort>::max());
|
||||
}
|
||||
}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
template <typename D>
|
||||
__global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
|
||||
{
|
||||
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (s_y >= src.rows || (s_x << 1) >= src.cols)
|
||||
return;
|
||||
|
||||
s_y = ::min(::max(s_y, 1), src.rows - 2);
|
||||
|
||||
Bayer2BGR<ushort> bayer;
|
||||
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
|
||||
|
||||
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
|
||||
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
dst(d_y, d_x) = toDst<D>(bayer.res0);
|
||||
if (d_x + 1 < src.cols)
|
||||
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
|
||||
}
|
||||
|
||||
template <int cn>
|
||||
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
|
||||
|
||||
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int cn>
|
||||
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
|
||||
|
||||
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
|
||||
template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
|
Reference in New Issue
Block a user