added gpu 1d window sum, convertTo, based on NPP.

added RGB <-> XYZ color conversion.
gpu morphology minor fix.
This commit is contained in:
Vladislav Vinogradov 2010-09-29 09:07:53 +00:00
parent 12b7f3a0d0
commit fb7aa43feb
9 changed files with 567 additions and 500 deletions

View File

@ -533,6 +533,9 @@ namespace cv
//! applies an advanced morphological operation to the image //! applies an advanced morphological operation to the image
CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations); CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations);
CV_EXPORTS void sumWindowColumn(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1);
CV_EXPORTS void sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1);
//////////////////////////////// Image Labeling //////////////////////////////// //////////////////////////////// Image Labeling ////////////////////////////////

View File

@ -188,7 +188,6 @@ namespace cv { namespace gpu { namespace improc
namespace imgproc namespace imgproc
{ {
template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {}; template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};
template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN> template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>
{ {
typedef typename TypeVec<uchar, DSTCN>::vec_t dst_t; typedef typename TypeVec<uchar, DSTCN>::vec_t dst_t;
@ -239,7 +238,6 @@ namespace imgproc
} }
template <int SRCCN, int GREEN_BITS> struct RGB2RGB5x5Converter {}; template <int SRCCN, int GREEN_BITS> struct RGB2RGB5x5Converter {};
template<int SRCCN> struct RGB2RGB5x5Converter<SRCCN, 6> template<int SRCCN> struct RGB2RGB5x5Converter<SRCCN, 6>
{ {
static __device__ unsigned short cvt(const uchar* src_ptr, int bidx) static __device__ unsigned short cvt(const uchar* src_ptr, int bidx)
@ -343,7 +341,7 @@ namespace cv { namespace gpu { namespace improc
namespace imgproc namespace imgproc
{ {
template <int DSTCN, typename T> template <int DSTCN, typename T>
__global__ void Gray2RGB(const T* src_, size_t src_step, T* dst_, size_t dst_step, int rows, int cols) __global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
{ {
typedef typename TypeVec<T, DSTCN>::vec_t dst_t; typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
@ -352,18 +350,17 @@ namespace imgproc
if (y < rows && x < cols) if (y < rows && x < cols)
{ {
T src = src_[y * src_step + x]; T src = *(const T*)(src_ + y * src_step + x * sizeof(T));
dst_t dst; dst_t dst;
dst.x = src; dst.x = src;
dst.y = src; dst.y = src;
dst.z = src; dst.z = src;
setAlpha(dst, ColorChannel<T>::max()); setAlpha(dst, ColorChannel<T>::max());
*(dst_t*)(dst_ + y * dst_step + x * DSTCN) = dst; *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
} }
} }
template <int GREEN_BITS> struct Gray2RGB5x5Converter {}; template <int GREEN_BITS> struct Gray2RGB5x5Converter {};
template<> struct Gray2RGB5x5Converter<6> template<> struct Gray2RGB5x5Converter<6>
{ {
static __device__ unsigned short cvt(unsigned int t) static __device__ unsigned short cvt(unsigned int t)
@ -398,7 +395,7 @@ namespace imgproc
namespace cv { namespace gpu { namespace improc namespace cv { namespace gpu { namespace improc
{ {
template <typename T, int DSTCN> template <typename T, int DSTCN>
void Gray2RGB_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream) void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
@ -406,14 +403,14 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x); grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
imgproc::Gray2RGB<DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(T), imgproc::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step / sizeof(T), src.rows, src.cols); dst.ptr, dst.step, src.rows, src.cols);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream) void Gray2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)
{ {
typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<uchar, 3>, Gray2RGB_caller<uchar, 4>}; static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<uchar, 3>, Gray2RGB_caller<uchar, 4>};
@ -421,17 +418,17 @@ namespace cv { namespace gpu { namespace improc
Gray2RGB_callers[dstcn - 3](src, dst, stream); Gray2RGB_callers[dstcn - 3](src, dst, stream);
} }
void Gray2RGB_gpu(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, int dstcn, cudaStream_t stream) void Gray2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)
{ {
typedef void (*Gray2RGB_caller_t)(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, cudaStream_t stream); typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<unsigned short, 3>, Gray2RGB_caller<unsigned short, 4>}; static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<unsigned short, 3>, Gray2RGB_caller<unsigned short, 4>};
Gray2RGB_callers[dstcn - 3](src, dst, stream); Gray2RGB_callers[dstcn - 3](src, dst, stream);
} }
void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream) void Gray2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream)
{ {
typedef void (*Gray2RGB_caller_t)(const DevMem2Df& src, const DevMem2Df& dst, cudaStream_t stream); typedef void (*Gray2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<float, 3>, Gray2RGB_caller<float, 4>}; static const Gray2RGB_caller_t Gray2RGB_callers[] = {Gray2RGB_caller<float, 3>, Gray2RGB_caller<float, 4>};
Gray2RGB_callers[dstcn - 3](src, dst, stream); Gray2RGB_callers[dstcn - 3](src, dst, stream);
@ -484,7 +481,6 @@ namespace imgproc
}; };
template <int GREEN_BITS> struct RGB5x52GrayConverter {}; template <int GREEN_BITS> struct RGB5x52GrayConverter {};
template<> struct RGB5x52GrayConverter<6> template<> struct RGB5x52GrayConverter<6>
{ {
static __device__ unsigned char cvt(unsigned int t) static __device__ unsigned char cvt(unsigned int t)
@ -514,200 +510,46 @@ namespace imgproc
} }
} }
__global__ void RGB2Gray_3(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx) template <typename T> struct RGB2GrayConvertor
{ {
const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2; static __device__ T cvt(const T* src, int bidx)
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows && x < cols)
{ {
const uchar* src = src_ + y * src_step + x * 3; return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);
uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
uchar4 dst;
dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
src += 3;
t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
src += 3;
t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
src += 3;
t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
*(uchar4*)(dst_ + y * dst_step + x) = dst;
} }
} };
template <> struct RGB2GrayConvertor<float>
__global__ void RGB2Gray_3(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)
{ {
const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1; static __device__ float cvt(const float* src, int bidx)
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows && x < cols)
{
const unsigned short* src = src_ + y * src_step + x * 3;
unsigned short t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
ushort2 dst;
dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
src += 3;
t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
*(ushort2*)(dst_ + y * dst_step + x) = dst;
}
}
__global__ void RGB2Gray_3(const float* src_, size_t src_step, float* dst_, size_t dst_step, int rows, int cols, int bidx)
{ {
const float cr = 0.299f; const float cr = 0.299f;
const float cg = 0.587f; const float cg = 0.587f;
const float cb = 0.114f; const float cb = 0.114f;
return src[bidx] * cb + src[1] * cg + src[bidx^2] * cr;
}
};
template <int SRCCN, typename T>
__global__ void RGB2Gray(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
{
typedef typename TypeVec<T, SRCCN>::vec_t src_t;
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows && x < cols) if (y < rows && x < cols)
{ {
const float* src = src_ + y * src_step + x * 3; src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));
float t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; *(T*)(dst_ + y * dst_step + x * sizeof(T)) = RGB2GrayConvertor<T>::cvt((const T*)(&src), bidx);
*(dst_ + y * dst_step + x) = t0 * cb + t1 * cg + t2 * cr;
}
}
__global__ void RGB2Gray_4(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
{
const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows && x < cols)
{
uchar4 src = *(uchar4*)(src_ + y * src_step + (x << 2));
uchar t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];
uchar4 dst;
dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
src = *(uchar4*)(src_ + y * src_step + (x << 2) + 4);
t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];
dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
src = *(uchar4*)(src_ + y * src_step + (x << 2) + 8);
t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];
dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
src = *(uchar4*)(src_ + y * src_step + (x << 2) + 12);
t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];
dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
*(uchar4*)(dst_ + y * dst_step + x) = dst;
}
}
__global__ void RGB2Gray_4(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)
{
const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows && x < cols)
{
ushort4 src = *(ushort4*)(src_ + y * src_step + (x << 2));
unsigned short t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];
ushort2 dst;
dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
src = *(ushort4*)(src_ + y * src_step + (x << 2) + 4);
t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];
dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
*(ushort2*)(dst_ + y * dst_step + x) = dst;
}
}
__global__ void RGB2Gray_4(const float* src_, size_t src_step, float* dst_, size_t dst_step, int rows, int cols, int bidx)
{
const float cr = 0.299f;
const float cg = 0.587f;
const float cb = 0.114f;
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows && x < cols)
{
float4 src = *(float4*)(src_ + y * src_step + (x << 2));
float t0 = ((float*)(&src))[bidx], t1 = src.y, t2 = ((float*)(&src))[bidx ^ 2];
*(dst_ + y * dst_step + x) = t0 * cb + t1 * cg + t2 * cr;
} }
} }
} }
namespace cv { namespace gpu { namespace improc namespace cv { namespace gpu { namespace improc
{ {
void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream) template <typename T, int SRCCN>
{ void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x << 2);
grid.y = divUp(src.rows, threads.y);
switch (srccn)
{
case 3:
imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(uchar), dst.ptr, dst.step / sizeof(uchar), src.rows, src.cols, bidx);
break;
case 4:
imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(uchar), dst.ptr, dst.step / sizeof(uchar), src.rows, src.cols, bidx);
break;
default:
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
break;
}
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
void RGB2Gray_gpu(const DevMem2D_<unsigned short>& src, int srccn, const DevMem2D_<unsigned short>& dst, int bidx, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x << 1);
grid.y = divUp(src.rows, threads.y);
switch (srccn)
{
case 3:
imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx);
break;
case 4:
imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx);
break;
default:
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
break;
}
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
@ -715,23 +557,37 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x); grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
switch (srccn) imgproc::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
{ dst.ptr, dst.step, src.rows, src.cols, bidx);
case 3:
imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(float), dst.ptr, dst.step / sizeof(float), src.rows, src.cols, bidx);
break;
case 4:
imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(float), dst.ptr, dst.step / sizeof(float), src.rows, src.cols, bidx);
break;
default:
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
break;
}
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void RGB2Gray_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)
{
typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<unsigned char, 3>, RGB2Gray_caller<unsigned char, 4>};
RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);
}
void RGB2Gray_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)
{
typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<unsigned short, 3>, RGB2Gray_caller<unsigned short, 4>};
RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);
}
void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream)
{
typedef void (*RGB2Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<float, 3>, RGB2Gray_caller<float, 4>};
RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);
}
template <int GREEN_BITS> template <int GREEN_BITS>
void RGB5x52Gray_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream) void RGB5x52Gray_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
{ {
@ -784,7 +640,6 @@ namespace imgproc
dst.z = saturate_cast<T>(Cb); dst.z = saturate_cast<T>(Cb);
} }
}; };
template<> struct RGB2YCrCbConverter<float> template<> struct RGB2YCrCbConverter<float>
{ {
typedef typename TypeVec<float, 3>::vec_t dst_t; typedef typename TypeVec<float, 3>::vec_t dst_t;
@ -832,7 +687,6 @@ namespace imgproc
dst[bidx^2] = saturate_cast<T>(r); dst[bidx^2] = saturate_cast<T>(r);
} }
}; };
template <> struct YCrCb2RGBConvertor<float> template <> struct YCrCb2RGBConvertor<float>
{ {
typedef typename TypeVec<float, 3>::vec_t src_t; typedef typename TypeVec<float, 3>::vec_t src_t;
@ -982,185 +836,194 @@ namespace cv { namespace gpu { namespace improc
////////////////////////////////////// RGB <-> XYZ /////////////////////////////////////// ////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
//namespace imgproc namespace imgproc
//{ {
// static const float sRGB2XYZ_D65[] = __constant__ float cXYZ_D65f[9];
// { __constant__ int cXYZ_D65i[9];
// 0.412453f, 0.357580f, 0.180423f,
// 0.212671f, 0.715160f, 0.072169f, template <typename T> struct RGB2XYZConvertor
// 0.019334f, 0.119193f, 0.950227f {
// }; typedef typename TypeVec<T, 3>::vec_t dst_t;
// static __device__ dst_t cvt(const T* src)
// static const float XYZ2sRGB_D65[] = {
// { dst_t dst;
// 3.240479f, -1.53715f, -0.498535f,
// -0.969256f, 1.875991f, 0.041556f, dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift));
// 0.055648f, -0.204043f, 1.057311f dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift));
// }; dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift));
//
// template<typename _Tp> struct RGB2XYZ_f return dst;
// { }
// typedef _Tp channel_type; };
// template <> struct RGB2XYZConvertor<float>
// RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) {
// { typedef typename TypeVec<float, 3>::vec_t dst_t;
// memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0])); static __device__ dst_t cvt(const float* src)
// if(blueIdx == 0) {
// { dst_t dst;
// std::swap(coeffs[0], coeffs[2]);
// std::swap(coeffs[3], coeffs[5]); dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2];
// std::swap(coeffs[6], coeffs[8]); dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5];
// } dst.z = src[0] * cXYZ_D65f[6] + src[1] * cXYZ_D65f[7] + src[2] * cXYZ_D65f[8];
// }
// void operator()(const _Tp* src, _Tp* dst, int n) const return dst;
// { }
// int scn = srccn; };
// float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], template <int SRCCN, typename T>
// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; __global__ void RGB2XYZ(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
// {
// n *= 3; typedef typename TypeVec<T, SRCCN>::vec_t src_t;
// for(int i = 0; i < n; i += 3, src += scn) typedef typename TypeVec<T, 3>::vec_t dst_t;
// {
// _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2); const int x = blockDim.x * blockIdx.x + threadIdx.x;
// _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5); const int y = blockDim.y * blockIdx.y + threadIdx.y;
// _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8);
// dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; if (y < rows && x < cols)
// } {
// } src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));
// int srccn;
// float coeffs[9]; *(dst_t*)(dst_ + y * dst_step + x * 3 * sizeof(T)) = RGB2XYZConvertor<T>::cvt((const T*)(&src));
// }; }
// }
// template<typename _Tp> struct RGB2XYZ_i
// { template <typename T> struct XYZ2RGBConvertor
// typedef _Tp channel_type; {
// typedef typename TypeVec<T, 3>::vec_t src_t;
// RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) static __device__ void cvt(const src_t& src, T* dst)
// { {
// static const int coeffs0[] = dst[0] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));
// { dst[1] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));
// 1689, 1465, 739, dst[2] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));
// 871, 2929, 296, }
// 79, 488, 3892 };
// }; template <> struct XYZ2RGBConvertor<float>
// for( int i = 0; i < 9; i++ ) {
// coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; typedef typename TypeVec<float, 3>::vec_t src_t;
// if(blueIdx == 0) static __device__ void cvt(const src_t& src, float* dst)
// { {
// std::swap(coeffs[0], coeffs[2]); dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2];
// std::swap(coeffs[3], coeffs[5]); dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];
// std::swap(coeffs[6], coeffs[8]); dst[2] = src.x * cXYZ_D65f[6] + src.y * cXYZ_D65f[7] + src.z * cXYZ_D65f[8];
// } }
// } };
// void operator()(const _Tp* src, _Tp* dst, int n) const
// { template <int DSTCN, typename T>
// int scn = srccn; __global__ void XYZ2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
// int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], {
// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], typedef typename TypeVec<T, 3>::vec_t src_t;
// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
// n *= 3;
// for(int i = 0; i < n; i += 3, src += scn) const int x = blockDim.x * blockIdx.x + threadIdx.x;
// { const int y = blockDim.y * blockIdx.y + threadIdx.y;
// int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
// int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); if (y < rows && x < cols)
// int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); {
// dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y); src_t src = *(const src_t*)(src_ + y * src_step + x * 3 * sizeof(T));
// dst[i+2] = saturate_cast<_Tp>(Z);
// } dst_t dst;
// } XYZ2RGBConvertor<T>::cvt(src, (T*)(&dst));
// int srccn; setAlpha(dst, ColorChannel<T>::max());
// int coeffs[9];
// }; *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
// }
// template<typename _Tp> struct XYZ2RGB_f }
// { }
// typedef _Tp channel_type;
// namespace cv { namespace gpu { namespace improc
// XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) {
// : dstcn(_dstcn), blueIdx(_blueIdx) template <typename T, int SRCCN>
// { void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
// memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0])); {
// if(blueIdx == 0) dim3 threads(32, 8, 1);
// { dim3 grid(1, 1, 1);
// std::swap(coeffs[0], coeffs[6]);
// std::swap(coeffs[1], coeffs[7]); grid.x = divUp(src.cols, threads.x);
// std::swap(coeffs[2], coeffs[8]); grid.y = divUp(src.rows, threads.y);
// }
// } imgproc::RGB2XYZ<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
// dst.ptr, dst.step, src.rows, src.cols);
// void operator()(const _Tp* src, _Tp* dst, int n) const
// { if (stream == 0)
// int dcn = dstcn; cudaSafeCall( cudaThreadSynchronize() );
// _Tp alpha = ColorChannel<_Tp>::max(); }
// float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)
// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; {
// n *= 3; typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
// for(int i = 0; i < n; i += 3, dst += dcn) static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<uchar, 3>, RGB2XYZ_caller<uchar, 4>};
// {
// _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
// _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5);
// _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8); RGB2XYZ_callers[srccn-3](src, dst, stream);
// dst[0] = B; dst[1] = G; dst[2] = R; }
// if( dcn == 4 )
// dst[3] = alpha; void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream)
// } {
// } typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
// int dstcn, blueIdx; static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<unsigned short, 3>, RGB2XYZ_caller<unsigned short, 4>};
// float coeffs[9];
// }; cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
//
// template<typename _Tp> struct XYZ2RGB_i RGB2XYZ_callers[srccn-3](src, dst, stream);
// { }
// typedef _Tp channel_type;
// void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream)
// XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) {
// : dstcn(_dstcn), blueIdx(_blueIdx) typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
// { static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<float, 3>, RGB2XYZ_caller<float, 4>};
// static const int coeffs0[] =
// { cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
// 13273, -6296, -2042,
// -3970, 7684, 170, RGB2XYZ_callers[srccn-3](src, dst, stream);
// 228, -836, 4331 }
// };
// for(int i = 0; i < 9; i++) template <typename T, int DSTCN>
// coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; void XYZ2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
// {
// if(blueIdx == 0) dim3 threads(32, 8, 1);
// { dim3 grid(1, 1, 1);
// std::swap(coeffs[0], coeffs[6]);
// std::swap(coeffs[1], coeffs[7]); grid.x = divUp(src.cols, threads.x);
// std::swap(coeffs[2], coeffs[8]); grid.y = divUp(src.rows, threads.y);
// }
// } imgproc::XYZ2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
// void operator()(const _Tp* src, _Tp* dst, int n) const dst.ptr, dst.step, src.rows, src.cols);
// {
// int dcn = dstcn; if (stream == 0)
// _Tp alpha = ColorChannel<_Tp>::max(); cudaSafeCall( cudaThreadSynchronize() );
// int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], }
// C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
// C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)
// n *= 3; {
// for(int i = 0; i < n; i += 3, dst += dcn) typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
// { static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<uchar, 3>, XYZ2RGB_caller<uchar, 4>};
// int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
// int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
// int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
// dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G); XYZ2RGB_callers[dstcn-3](src, dst, stream);
// dst[2] = saturate_cast<_Tp>(R); }
// if( dcn == 4 )
// dst[3] = alpha; void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)
// } {
// } typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
// int dstcn, blueIdx; static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<unsigned short, 3>, XYZ2RGB_caller<unsigned short, 4>};
// int coeffs[9];
// }; cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
//}
// XYZ2RGB_callers[dstcn-3](src, dst, stream);
//namespace cv { namespace gpu { namespace impl }
//{
//}}} void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream)
{
typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<float, 3>, XYZ2RGB_caller<float, 4>};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
XYZ2RGB_callers[dstcn-3](src, dst, stream);
}
}}}
////////////////////////////////////// RGB <-> HSV /////////////////////////////////////// ////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////

View File

@ -51,6 +51,9 @@ using namespace cv::gpu;
void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); } void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); } void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); } void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }
void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); }
void cv::gpu::sumWindowColumn(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
void cv::gpu::sumWindowRow(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
#else #else
@ -65,9 +68,9 @@ namespace
CV_Assert(kernel.type() == CV_8U && (kernel.cols & 1) != 0 && (kernel.rows & 1) != 0); CV_Assert(kernel.type() == CV_8U && (kernel.cols & 1) != 0 && (kernel.rows & 1) != 0);
if( anchor.x == -1 ) if( anchor.x == -1 )
anchor.x = 0; anchor.x = kernel.cols / 2;
if( anchor.y == -1 ) if( anchor.y == -1 )
anchor.y = 0; anchor.y = kernel.rows / 2;
// in NPP for Cuda 3.1 only such anchor is supported. // in NPP for Cuda 3.1 only such anchor is supported.
CV_Assert(anchor.x == 0 && anchor.y == 0); CV_Assert(anchor.x == 0 && anchor.y == 0);
@ -94,10 +97,16 @@ namespace
anc.y = anchor.y; anc.y = anchor.y;
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
GpuMat dstBuf;
if (iterations > 1)
dstBuf.create(src.size(), src.type());
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, gpu_krnl.ptr<Npp8u>(), mask_sz, anc) ); nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, gpu_krnl.ptr<Npp8u>(), mask_sz, anc) );
for(int i = 1; i < iterations; ++i) for(int i = 1; i < iterations; ++i)
nppSafeCall( func(dst.ptr<Npp8u>(), dst.step, dst.ptr<Npp8u>(), dst.step, sz, gpu_krnl.ptr<Npp8u>(), mask_sz, anc) ); {
dst.swap(dstBuf);
nppSafeCall( func(dstBuf.ptr<Npp8u>(), dstBuf.step, dst.ptr<Npp8u>(), dst.step, sz, gpu_krnl.ptr<Npp8u>(), mask_sz, anc) );
}
} }
} }
@ -154,4 +163,78 @@ void cv::gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& k
} }
} }
////////////////////////////////////////////////////////////////////////
// boxFilter
void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor)
{
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
CV_Assert(ksize.height == 3 || ksize.height == 5 || ksize.height == 7);
CV_Assert(ksize.height == ksize.width);
if (anchor.x == -1)
anchor.x = 0;
if (anchor.y == -1)
anchor.y = 0;
CV_Assert(anchor.x == 0 && anchor.y == 0);
dst.create(src.size(), src.type());
NppiSize srcsz;
srcsz.height = src.rows;
srcsz.width = src.cols;
NppiSize masksz;
masksz.height = ksize.height;
masksz.width = ksize.width;
NppiPoint anc;
anc.x = anchor.x;
anc.y = anchor.y;
if (src.type() == CV_8UC1)
{
nppSafeCall( nppiFilterBox_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );
}
else
{
nppSafeCall( nppiFilterBox_8u_C4R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );
}
}
////////////////////////////////////////////////////////////////////////
// sumWindow Filter
namespace
{
typedef NppStatus (*nppSumWindow_t)(const Npp8u * pSrc, Npp32s nSrcStep,
Npp32f * pDst, Npp32s nDstStep, NppiSize oROI,
Npp32s nMaskSize, Npp32s nAnchor);
inline void sumWindowCaller(nppSumWindow_t func, const GpuMat& src, GpuMat& dst, int ksize, int anchor)
{
CV_Assert(src.type() == CV_8UC1);
if (anchor == -1)
anchor = ksize / 2;
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
dst.create(src.size(), CV_32FC1);
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );
}
}
void cv::gpu::sumWindowColumn(const GpuMat& src, GpuMat& dst, int ksize, int anchor)
{
sumWindowCaller(nppiSumWindowColumn_8u32f_C1R, src, dst, ksize, anchor);
}
void cv::gpu::sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor)
{
sumWindowCaller(nppiSumWindowRow_8u32f_C1R, src, dst, ksize, anchor);
}
#endif #endif

View File

@ -62,7 +62,6 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); } void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); } void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }
void cv::gpu::integral(GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::integral(GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
@ -88,14 +87,14 @@ namespace cv { namespace gpu
void RGB5x52RGB_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream); void RGB5x52RGB_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
void RGB2RGB5x5_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int green_bits, int bidx, cudaStream_t stream); void RGB2RGB5x5_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int green_bits, int bidx, cudaStream_t stream);
void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream); void Gray2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);
void Gray2RGB_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int dstcn, cudaStream_t stream); void Gray2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);
void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream); void Gray2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);
void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream); void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream);
void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream); void RGB2Gray_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);
void RGB2Gray_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int bidx, cudaStream_t stream); void RGB2Gray_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);
void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream); void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);
void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream); void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream);
void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream); void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream);
@ -105,6 +104,14 @@ namespace cv { namespace gpu
void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream); void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);
void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream); void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);
void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream); void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream);
void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);
void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream);
void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream);
void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);
void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);
void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream);
} }
}} }}
@ -312,11 +319,11 @@ namespace
bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
if( depth == CV_8U ) if( depth == CV_8U )
improc::RGB2Gray_gpu((DevMem2D)src, scn, (DevMem2D)out, bidx, stream); improc::RGB2Gray_gpu_8u(src, scn, out, bidx, stream);
else if( depth == CV_16U ) else if( depth == CV_16U )
improc::RGB2Gray_gpu((DevMem2D_<unsigned short>)src, scn, (DevMem2D_<unsigned short>)out, bidx, stream); improc::RGB2Gray_gpu_16u(src, scn, out, bidx, stream);
else else
improc::RGB2Gray_gpu((DevMem2Df)src, scn, (DevMem2Df)out, bidx, stream); improc::RGB2Gray_gpu_32f(src, scn, out, bidx, stream);
break; break;
case CV_BGR5652GRAY: case CV_BGR5552GRAY: case CV_BGR5652GRAY: case CV_BGR5552GRAY:
@ -335,11 +342,11 @@ namespace
out.create(sz, CV_MAKETYPE(depth, dcn)); out.create(sz, CV_MAKETYPE(depth, dcn));
if( depth == CV_8U ) if( depth == CV_8U )
improc::Gray2RGB_gpu((DevMem2D)src, (DevMem2D)out, dcn, stream); improc::Gray2RGB_gpu_8u(src, out, dcn, stream);
else if( depth == CV_16U ) else if( depth == CV_16U )
improc::Gray2RGB_gpu((DevMem2D_<unsigned short>)src, (DevMem2D_<unsigned short>)out, dcn, stream); improc::Gray2RGB_gpu_16u(src, out, dcn, stream);
else else
improc::Gray2RGB_gpu((DevMem2Df)src, (DevMem2Df)out, dcn, stream); improc::Gray2RGB_gpu_32f(src, out, dcn, stream);
break; break;
case CV_GRAY2BGR565: case CV_GRAY2BGR555: case CV_GRAY2BGR565: case CV_GRAY2BGR555:
@ -414,34 +421,97 @@ namespace
} }
break; break;
//case CV_BGR2XYZ: case CV_RGB2XYZ: case CV_BGR2XYZ: case CV_RGB2XYZ:
// CV_Assert( scn == 3 || scn == 4 ); {
// bidx = code == CV_BGR2XYZ ? 0 : 2; CV_Assert( scn == 3 || scn == 4 );
//
// dst.create(sz, CV_MAKETYPE(depth, 3));
//
// if( depth == CV_8U )
// CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0));
// else if( depth == CV_16U )
// CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0));
// else
// CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0));
// break;
//case CV_XYZ2BGR: case CV_XYZ2RGB: bidx = code == CV_BGR2XYZ ? 0 : 2;
// if( dcn <= 0 ) dcn = 3;
// CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); static const float RGB2XYZ_D65f[] =
// bidx = code == CV_XYZ2BGR ? 0 : 2; {
// 0.412453f, 0.357580f, 0.180423f,
// dst.create(sz, CV_MAKETYPE(depth, dcn)); 0.212671f, 0.715160f, 0.072169f,
// 0.019334f, 0.119193f, 0.950227f
// if( depth == CV_8U ) };
// CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0)); static const int RGB2XYZ_D65i[] =
// else if( depth == CV_16U ) {
// CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0)); 1689, 1465, 739,
// else 871, 2929, 296,
// CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0)); 79, 488, 3892
// break; };
float coeffs_f[9];
int coeffs_i[9];
::memcpy(coeffs_f, RGB2XYZ_D65f, 9 * sizeof(float));
::memcpy(coeffs_i, RGB2XYZ_D65i, 9 * sizeof(int));
if (bidx == 0)
{
std::swap(coeffs_f[0], coeffs_f[2]);
std::swap(coeffs_f[3], coeffs_f[5]);
std::swap(coeffs_f[6], coeffs_f[8]);
std::swap(coeffs_i[0], coeffs_i[2]);
std::swap(coeffs_i[3], coeffs_i[5]);
std::swap(coeffs_i[6], coeffs_i[8]);
}
out.create(sz, CV_MAKETYPE(depth, 3));
if( depth == CV_8U )
improc::RGB2XYZ_gpu_8u(src, scn, out, coeffs_i, stream);
else if( depth == CV_16U )
improc::RGB2XYZ_gpu_16u(src, scn, out, coeffs_i, stream);
else
improc::RGB2XYZ_gpu_32f(src, scn, out, coeffs_f, stream);
}
break;
case CV_XYZ2BGR: case CV_XYZ2RGB:
{
if (dcn <= 0) dcn = 3;
CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
bidx = code == CV_XYZ2BGR ? 0 : 2;
static const float XYZ2sRGB_D65f[] =
{
3.240479f, -1.53715f, -0.498535f,
-0.969256f, 1.875991f, 0.041556f,
0.055648f, -0.204043f, 1.057311f
};
static const int XYZ2sRGB_D65i[] =
{
13273, -6296, -2042,
-3970, 7684, 170,
228, -836, 4331
};
float coeffs_f[9];
int coeffs_i[9];
::memcpy(coeffs_f, XYZ2sRGB_D65f, 9 * sizeof(float));
::memcpy(coeffs_i, XYZ2sRGB_D65i, 9 * sizeof(int));
if (bidx == 0)
{
std::swap(coeffs_f[0], coeffs_f[6]);
std::swap(coeffs_f[1], coeffs_f[7]);
std::swap(coeffs_f[2], coeffs_f[8]);
std::swap(coeffs_i[0], coeffs_i[6]);
std::swap(coeffs_i[1], coeffs_i[7]);
std::swap(coeffs_i[2], coeffs_i[8]);
}
out.create(sz, CV_MAKETYPE(depth, dcn));
if( depth == CV_8U )
improc::XYZ2RGB_gpu_8u(src, out, dcn, coeffs_i, stream);
else if( depth == CV_16U )
improc::XYZ2RGB_gpu_16u(src, out, dcn, coeffs_i, stream);
else
improc::XYZ2RGB_gpu_32f(src, out, dcn, coeffs_f, stream);
}
break;
//case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: //case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
//case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: //case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:
@ -916,42 +986,4 @@ void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum)
sum.step, sqsum.ptr<Npp32f>(), sqsum.step, sz, 0, 0.0f, h) ); sum.step, sqsum.ptr<Npp32f>(), sqsum.step, sz, 0, 0.0f, h) );
} }
////////////////////////////////////////////////////////////////////////
// boxFilter
void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor)
{
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
CV_Assert(ksize.height == 3 || ksize.height == 5 || ksize.height == 7);
CV_Assert(ksize.height == ksize.width);
if (anchor.x == -1)
anchor.x = 0;
if (anchor.y == -1)
anchor.y = 0;
CV_Assert(anchor.x == 0 && anchor.y == 0);
dst.create(src.size(), src.type());
NppiSize srcsz;
srcsz.height = src.rows;
srcsz.width = src.cols;
NppiSize masksz;
masksz.height = ksize.height;
masksz.width = ksize.width;
NppiPoint anc;
anc.x = anchor.x;
anc.y = anchor.y;
if (src.type() == CV_8UC1)
{
nppSafeCall( nppiFilterBox_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );
}
else
{
nppSafeCall( nppiFilterBox_8u_C4R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, srcsz, masksz, anc) );
}
}
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */

View File

@ -133,6 +133,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
else else
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
int stype = type();
int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);
if( sdepth == ddepth && noScale ) if( sdepth == ddepth && noScale )
{ {
@ -146,7 +147,50 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
psrc = &(temp = *this); psrc = &(temp = *this);
dst.create( size(), rtype ); dst.create( size(), rtype );
if (!noScale)
matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta); matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta);
else
{
NppiSize sz;
sz.width = cols;
sz.height = rows;
if (stype == CV_8UC1 && ddepth == CV_16U)
nppSafeCall( nppiConvert_8u16u_C1R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16u>(), dst.step, sz) );
else if (stype == CV_16UC1 && ddepth == CV_8U)
nppSafeCall( nppiConvert_16u8u_C1R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );
else if (stype == CV_8UC4 && ddepth == CV_16U)
nppSafeCall( nppiConvert_8u16u_C4R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16u>(), dst.step, sz) );
else if (stype == CV_16UC4 && ddepth == CV_8U)
nppSafeCall( nppiConvert_16u8u_C4R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );
else if (stype == CV_8UC1 && ddepth == CV_16S)
nppSafeCall( nppiConvert_8u16s_C1R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16s>(), dst.step, sz) );
else if (stype == CV_16SC1 && ddepth == CV_8U)
nppSafeCall( nppiConvert_16s8u_C1R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );
else if (stype == CV_8UC4 && ddepth == CV_16S)
nppSafeCall( nppiConvert_8u16s_C4R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp16s>(), dst.step, sz) );
else if (stype == CV_16SC4 && ddepth == CV_8U)
nppSafeCall( nppiConvert_16s8u_C4R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz) );
else if (stype == CV_16SC1 && ddepth == CV_32F)
nppSafeCall( nppiConvert_16s32f_C1R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp32f>(), dst.step, sz) );
else if (stype == CV_32FC1 && ddepth == CV_16S)
nppSafeCall( nppiConvert_32f16s_C1R(psrc->ptr<Npp32f>(), psrc->step, dst.ptr<Npp16s>(), dst.step, sz, NPP_RND_NEAR) );
else if (stype == CV_8UC1 && ddepth == CV_32F)
nppSafeCall( nppiConvert_8u32f_C1R(psrc->ptr<Npp8u>(), psrc->step, dst.ptr<Npp32f>(), dst.step, sz) );
else if (stype == CV_32FC1 && ddepth == CV_8U)
nppSafeCall( nppiConvert_32f8u_C1R(psrc->ptr<Npp32f>(), psrc->step, dst.ptr<Npp8u>(), dst.step, sz, NPP_RND_NEAR) );
else if (stype == CV_16UC1 && ddepth == CV_32F)
nppSafeCall( nppiConvert_16u32f_C1R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp32f>(), dst.step, sz) );
else if (stype == CV_32FC1 && ddepth == CV_16U)
nppSafeCall( nppiConvert_32f16u_C1R(psrc->ptr<Npp32f>(), psrc->step, dst.ptr<Npp16u>(), dst.step, sz, NPP_RND_NEAR) );
else if (stype == CV_16UC1 && ddepth == CV_32S)
nppSafeCall( nppiConvert_16u32s_C1R(psrc->ptr<Npp16u>(), psrc->step, dst.ptr<Npp32s>(), dst.step, sz) );
else if (stype == CV_16SC1 && ddepth == CV_32S)
nppSafeCall( nppiConvert_16s32s_C1R(psrc->ptr<Npp16s>(), psrc->step, dst.ptr<Npp32s>(), dst.step, sz) );
else
matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), 1.0, 0.0);
}
} }
GpuMat& GpuMat::operator = (const Scalar& s) GpuMat& GpuMat::operator = (const Scalar& s)

View File

@ -47,12 +47,11 @@ const char* blacklist[] =
{ {
"GPU-NppImageSum", // crash "GPU-NppImageSum", // crash
"GPU-MatOperatorAsyncCall", // crash "GPU-MatOperatorAsyncCall", // crash
//"GPU-NppErode", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR) //"GPU-NppErode", // different border interpolation
//"GPU-NppDilate", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR) //"GPU-NppMorphologyEx", // different border interpolation
//"GPU-NppMorphologyEx", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
//"GPU-NppImageDivide", // different round mode //"GPU-NppImageDivide", // different round mode
//"GPU-NppImageMeanStdDev", // different precision //"GPU-NppImageMeanStdDev", // different precision
//"GPU-NppImageMinNax", // npp bug //"GPU-NppImageMinNax", // npp bug - don't find min/max near right border
//"GPU-NppImageResize", // different precision in interpolation //"GPU-NppImageResize", // different precision in interpolation
//"GPU-NppImageWarpAffine", // different precision in interpolation //"GPU-NppImageWarpAffine", // different precision in interpolation
//"GPU-NppImageWarpPerspective", // different precision in interpolation //"GPU-NppImageWarpPerspective", // different precision in interpolation
@ -61,6 +60,7 @@ const char* blacklist[] =
//"GPU-NppImageExp", // different precision //"GPU-NppImageExp", // different precision
//"GPU-NppImageLog", // different precision //"GPU-NppImageLog", // different precision
//"GPU-NppImageMagnitude", // different precision //"GPU-NppImageMagnitude", // different precision
//"GPU-NppImageSumWindow", // different border interpolation
0 0
}; };

View File

@ -451,6 +451,47 @@ struct CV_GpuNppImageBlurTest : public CV_GpuImageProcTest
} }
}; };
////////////////////////////////////////////////////////////////////////////////
// sumWindow
struct CV_GpuNppImageSumWindowTest : public CV_GpuImageProcTest
{
CV_GpuNppImageSumWindowTest() : CV_GpuImageProcTest( "GPU-NppImageSumWindow", "sumWindow" ) {}
int test(const Mat& img)
{
if (img.type() != CV_8UC1)
{
ts->printf(CvTS::LOG, "\nUnsupported type\n");
return CvTS::OK;
}
int ksizes[] = {3, 5, 7};
int ksizes_num = sizeof(ksizes) / sizeof(int);
int test_res = CvTS::OK;
for (int i = 0; i < ksizes_num; ++i)
{
ts->printf(CvTS::LOG, "\nksize = %d\n", ksizes[i]);
Mat cpudst(img.size(), CV_64FC1, Scalar());
cv::Ptr<cv::BaseRowFilter> ft = cv::getRowSumFilter(CV_8UC1, CV_64FC1, ksizes[i], 0);
for (int y = 0; y < img.rows; ++y)
(*ft)(img.ptr(y), cpudst.ptr(y), img.cols, 1);
cpudst.convertTo(cpudst, CV_32F);
GpuMat gpu1(img);
GpuMat gpudst;
cv::gpu::sumWindowRow(gpu1, gpudst, ksizes[i], 0);
if (CheckNorm(cpudst, gpudst) != CvTS::OK)
test_res = CvTS::FAIL_GENERIC;
}
return test_res;
}
};
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// cvtColor // cvtColor
class CV_GpuCvtColorTest : public CvTest class CV_GpuCvtColorTest : public CvTest
@ -501,11 +542,13 @@ void CV_GpuCvtColorTest::run( int )
int codes[] = { CV_BGR2RGB, CV_RGB2BGRA, CV_BGRA2RGB, int codes[] = { CV_BGR2RGB, CV_RGB2BGRA, CV_BGRA2RGB,
CV_RGB2BGR555, CV_BGR5552BGR, CV_BGR2BGR565, CV_BGR5652RGB, CV_RGB2BGR555, CV_BGR5552BGR, CV_BGR2BGR565, CV_BGR5652RGB,
CV_RGB2YCrCb, CV_YCrCb2BGR, CV_BGR2YUV, CV_YUV2RGB, CV_RGB2YCrCb, CV_YCrCb2BGR, CV_BGR2YUV, CV_YUV2RGB,
CV_RGB2XYZ, CV_XYZ2BGR, CV_BGR2XYZ, CV_XYZ2RGB,
CV_RGB2GRAY, CV_GRAY2BGRA, CV_BGRA2GRAY, CV_RGB2GRAY, CV_GRAY2BGRA, CV_BGRA2GRAY,
CV_GRAY2BGR555, CV_BGR5552GRAY, CV_GRAY2BGR565, CV_BGR5652GRAY}; CV_GRAY2BGR555, CV_BGR5552GRAY, CV_GRAY2BGR565, CV_BGR5652GRAY};
const char* codes_str[] = { "CV_BGR2RGB", "CV_RGB2BGRA", "CV_BGRA2RGB", const char* codes_str[] = { "CV_BGR2RGB", "CV_RGB2BGRA", "CV_BGRA2RGB",
"CV_RGB2BGR555", "CV_BGR5552BGR", "CV_BGR2BGR565", "CV_BGR5652RGB", "CV_RGB2BGR555", "CV_BGR5552BGR", "CV_BGR2BGR565", "CV_BGR5652RGB",
"CV_RGB2YCrCb", "CV_YCrCb2BGR", "CV_BGR2YUV", "CV_YUV2RGB", "CV_RGB2YCrCb", "CV_YCrCb2BGR", "CV_BGR2YUV", "CV_YUV2RGB",
"CV_RGB2XYZ", "CV_XYZ2BGR", "CV_BGR2XYZ", "CV_XYZ2RGB",
"CV_RGB2GRAY", "CV_GRAY2BGRA", "CV_BGRA2GRAY", "CV_RGB2GRAY", "CV_GRAY2BGRA", "CV_BGRA2GRAY",
"CV_GRAY2BGR555", "CV_BGR5552GRAY", "CV_GRAY2BGR565", "CV_BGR5652GRAY"}; "CV_GRAY2BGR555", "CV_BGR5552GRAY", "CV_GRAY2BGR565", "CV_BGR5652GRAY"};
int codes_num = sizeof(codes) / sizeof(int); int codes_num = sizeof(codes) / sizeof(int);
@ -554,4 +597,5 @@ CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test;
CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test; CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test;
CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test; CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test;
CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test; CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test;
CV_GpuNppImageSumWindowTest CV_GpuNppImageSumWindow_test;
CV_GpuCvtColorTest CV_GpuCvtColor_test; CV_GpuCvtColorTest CV_GpuCvtColor_test;

View File

@ -134,10 +134,10 @@ protected:
virtual int test(const Mat& img) virtual int test(const Mat& img)
{ {
GpuMat kernel(Mat::ones(3, 3, CV_8U)); GpuMat kernel(Mat::ones(3, 3, CV_8U));
Point anchor(-1, -1); Point anchor(0, 0);
int iters = 3; int iters = 1;
cv::Mat cpuRes; cv::Mat cpuRes, cpuRes1;
cv::erode(img, cpuRes, kernel, anchor, iters); cv::erode(img, cpuRes, kernel, anchor, iters);
GpuMat gpuRes; GpuMat gpuRes;
@ -158,13 +158,13 @@ protected:
virtual int test(const Mat& img) virtual int test(const Mat& img)
{ {
GpuMat kernel(Mat::ones(3, 3, CV_8U)); GpuMat kernel(Mat::ones(3, 3, CV_8U));
Point anchor(-1, -1); Point anchor(0, 0);
int iters = 3; int iters = 1;
cv::Mat cpuRes; cv::Mat cpuRes, cpuRes1;
cv::dilate(img, cpuRes, kernel, anchor, iters); cv::dilate(img, cpuRes, kernel, anchor, iters);
GpuMat gpuRes; GpuMat gpuRes, gpuRes1;
cv::gpu::dilate(GpuMat(img), gpuRes, kernel, anchor, iters); cv::gpu::dilate(GpuMat(img), gpuRes, kernel, anchor, iters);
return CheckNorm(cpuRes, gpuRes); return CheckNorm(cpuRes, gpuRes);
@ -186,8 +186,8 @@ protected:
int num = sizeof(ops)/sizeof(ops[0]); int num = sizeof(ops)/sizeof(ops[0]);
GpuMat kernel(Mat::ones(3, 3, CV_8U)); GpuMat kernel(Mat::ones(3, 3, CV_8U));
Point anchor(-1, -1); Point anchor(0, 0);
int iters = 3; int iters = 1;
for(int i = 0; i < num; ++i) for(int i = 0; i < num; ++i)
{ {

View File

@ -83,8 +83,6 @@ void CV_GpuMatOpConvertToTest::run(int /* start_from */)
const int dst_type = types[j]; const int dst_type = types[j];
cv::RNG rng(*ts->get_rng()); cv::RNG rng(*ts->get_rng());
const double alpha = rng.uniform(0.0, 2.0);
const double beta = rng.uniform(-75.0, 75.0);
Mat cpumatsrc(img_size, src_type); Mat cpumatsrc(img_size, src_type);
rng.fill(cpumatsrc, RNG::UNIFORM, Scalar::all(0), Scalar::all(300)); rng.fill(cpumatsrc, RNG::UNIFORM, Scalar::all(0), Scalar::all(300));
@ -93,8 +91,8 @@ void CV_GpuMatOpConvertToTest::run(int /* start_from */)
Mat cpumatdst; Mat cpumatdst;
GpuMat gpumatdst; GpuMat gpumatdst;
cpumatsrc.convertTo(cpumatdst, dst_type, alpha, beta); cpumatsrc.convertTo(cpumatdst, dst_type);
gpumatsrc.convertTo(gpumatdst, dst_type, alpha, beta); gpumatsrc.convertTo(gpumatdst, dst_type);
double r = norm(cpumatdst, gpumatdst, NORM_INF); double r = norm(cpumatdst, gpumatdst, NORM_INF);
if (r > 1) if (r > 1)