added per-element min/max to gpu module.

fixed compile error in transform.
This commit is contained in:
Vladislav Vinogradov
2010-12-06 08:10:11 +00:00
parent d96c5ebb7d
commit 17d9014373
5 changed files with 319 additions and 3 deletions

View File

@@ -43,6 +43,7 @@
#include "cuda_shared.hpp"
#include "transform.hpp"
#include "limits_gpu.hpp"
#include "saturate_cast.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
@@ -1295,4 +1296,127 @@ namespace cv { namespace gpu { namespace mathfunc
template void transpose_gpu<short2 >(const DevMem2D& src, const DevMem2D& dst);
template void transpose_gpu<int >(const DevMem2D& src, const DevMem2D& dst);
template void transpose_gpu<float >(const DevMem2D& src, const DevMem2D& dst);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// min/max
struct MinOp
{
template <typename T>
__device__ T operator()(T a, T b)
{
return min(a, b);
}
__device__ float operator()(float a, float b)
{
return fmin(a, b);
}
__device__ double operator()(double a, double b)
{
return fmin(a, b);
}
};
struct MaxOp
{
template <typename T>
__device__ T operator()(T a, T b)
{
return max(a, b);
}
__device__ float operator()(float a, float b)
{
return fmax(a, b);
}
__device__ double operator()(double a, double b)
{
return fmax(a, b);
}
};
struct ScalarMinOp
{
double s;
explicit ScalarMinOp(double s_) : s(s_) {}
template <typename T>
__device__ T operator()(T a)
{
return saturate_cast<T>(fmin((double)a, s));
}
};
struct ScalarMaxOp
{
double s;
explicit ScalarMaxOp(double s_) : s(s_) {}
template <typename T>
__device__ T operator()(T a)
{
return saturate_cast<T>(fmax((double)a, s));
}
};
template <typename T>
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
{
MinOp op;
transform(src1, src2, dst, op, stream);
}
template void min_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
template void min_gpu<char >(const DevMem2D_<char>& src1, const DevMem2D_<char>& src2, const DevMem2D_<char>& dst, cudaStream_t stream);
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
template void min_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream);
template void min_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream);
template void min_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream);
template void min_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream);
template <typename T>
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
{
MaxOp op;
transform(src1, src2, dst, op, stream);
}
template void max_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
template void max_gpu<char >(const DevMem2D_<char>& src1, const DevMem2D_<char>& src2, const DevMem2D_<char>& dst, cudaStream_t stream);
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
template void max_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream);
template void max_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream);
template void max_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream);
template void max_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream);
template <typename T>
void min_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream)
{
ScalarMinOp op(src2);
transform(src1, dst, op, stream);
}
template void min_gpu<uchar >(const DevMem2D& src1, double src2, const DevMem2D& dst, cudaStream_t stream);
template void min_gpu<char >(const DevMem2D_<char>& src1, double src2, const DevMem2D_<char>& dst, cudaStream_t stream);
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, double src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
template void min_gpu<short >(const DevMem2D_<short>& src1, double src2, const DevMem2D_<short>& dst, cudaStream_t stream);
template void min_gpu<int >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
template void min_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
template void min_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
template <typename T>
void max_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream)
{
ScalarMaxOp op(src2);
transform(src1, dst, op, stream);
}
template void max_gpu<uchar >(const DevMem2D& src1, double src2, const DevMem2D& dst, cudaStream_t stream);
template void max_gpu<char >(const DevMem2D_<char>& src1, double src2, const DevMem2D_<char>& dst, cudaStream_t stream);
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, double src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
template void max_gpu<short >(const DevMem2D_<short>& src1, double src2, const DevMem2D_<short>& dst, cudaStream_t stream);
template void max_gpu<int >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
template void max_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
}}}

View File

@@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device
//! Transform kernels
template <typename T, typename D, typename Mask, typename UnOp>
template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
@@ -77,7 +77,7 @@ namespace cv { namespace gpu { namespace device
}
}
template <typename T1, typename T2, typename D, typename Mask, typename BinOp>
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
@@ -105,7 +105,7 @@ namespace cv
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
device::transform<T, D><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );