From 8abdb3721fca6e9de98b36cd4c33d54bd78f486c Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 24 Jan 2011 10:11:02 +0000 Subject: [PATCH] added gpu threshold. --- doc/gpu_image_processing.tex | 8 +- modules/gpu/include/opencv2/gpu/gpu.hpp | 7 +- modules/gpu/src/cuda/element_operations.cu | 111 ++++- modules/gpu/src/cuda/mathfunc.cu | 2 +- modules/gpu/src/cuda/matrix_operations.cu | 176 ++----- modules/gpu/src/cuda/matrix_reductions.cu | 2 +- modules/gpu/src/cuda/surf.cu | 1 - modules/gpu/src/cuda/surf_key_point.h | 54 --- modules/gpu/src/cuda/transform.hpp | 130 ------ modules/gpu/src/cudastream.cpp | 4 +- modules/gpu/src/element_operations.cpp | 70 +++ modules/gpu/src/imgproc_gpu.cpp | 20 - modules/gpu/src/matrix_operations.cpp | 6 +- .../gpu/src/opencv2/gpu/device/transform.hpp | 433 ++++++++++++++++++ .../gpu/src/opencv2/gpu/device/vecmath.hpp | 80 +++- modules/gpu/src/precomp.hpp | 1 - tests/gpu/src/imgproc_gpu.cpp | 33 +- 17 files changed, 768 insertions(+), 370 deletions(-) delete mode 100644 modules/gpu/src/cuda/surf_key_point.h delete mode 100644 modules/gpu/src/cuda/transform.hpp create mode 100644 modules/gpu/src/opencv2/gpu/device/transform.hpp diff --git a/doc/gpu_image_processing.tex b/doc/gpu_image_processing.tex index fe6a69ee8..d4d127077 100644 --- a/doc/gpu_image_processing.tex +++ b/doc/gpu_image_processing.tex @@ -314,13 +314,17 @@ See also: \cvCppCross{cvtColor}. Applies a fixed-level threshold to each array element. \cvdefCpp{ -double threshold(const GpuMat\& src, GpuMat\& dst, double thresh); +double threshold(const GpuMat\& src, GpuMat\& dst, double thresh, \par double maxval, int type);\newline +double threshold(const GpuMat\& src, GpuMat\& dst, double thresh, \par double maxval, int type, const Stream\& stream); } \begin{description} -\cvarg{src}{Source array. Supports only \texttt{CV\_32FC1} type.} +\cvarg{src}{Source array (single-channel, \texttt{CV\_64F} depth doesn't supported).} \cvarg{dst}{Destination array; will have the same size and the same type as \texttt{src}.} \cvarg{thresh}{Threshold value.} +\cvarg{maxVal}{Maximum value to use with \texttt{THRESH\_BINARY} and \texttt{THRESH\_BINARY\_INV} thresholding types.} +\cvarg{thresholdType}{Thresholding type. For details see \cvCppCross{threshold}. \texttt{THRESH\_OTSU} thresholding type doesn't supported.} +\cvarg{stream}{Stream for the asynchronous version.} \end{description} See also: \cvCppCross{threshold}. diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index f566867da..769d2373e 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -587,9 +587,10 @@ namespace cv //! async version CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, const Stream& stream); - //! applies fixed threshold to the image. - //! Now supports only THRESH_TRUNC threshold type and one channels float source. - CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh); + //! applies fixed threshold to the image + CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type); + //! async version + CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, const Stream& stream); //! resizes the image //! Supports INTER_NEAREST, INTER_LINEAR diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index cc44c647e..4d205255c 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -41,7 +41,8 @@ //M*/ #include "opencv2/gpu/device/vecmath.hpp" -#include "transform.hpp" +#include "opencv2/gpu/device/transform.hpp" +#include "opencv2/gpu/device/saturate_cast.hpp" #include "internal_shared.hpp" using namespace cv::gpu; @@ -468,4 +469,112 @@ namespace cv { namespace gpu { namespace mathfunc template void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); template void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); template void max_gpu(const DevMem2D_& src1, double src2, const DevMem2D_& dst, cudaStream_t stream); + + + ////////////////////////////////////////////////////////////////////////// + // threshold + + class ThreshOp + { + public: + ThreshOp(float thresh_, float maxVal_) : thresh(thresh_), maxVal(maxVal_) {} + + protected: + float thresh; + float maxVal; + }; + + class ThreshBinary : public ThreshOp + { + public: + ThreshBinary(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {} + + template + __device__ T operator()(const T& src) const + { + return (float)src > thresh ? saturate_cast(maxVal) : 0; + } + }; + + class ThreshBinaryInv : public ThreshOp + { + public: + ThreshBinaryInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {} + + template + __device__ T operator()(const T& src) const + { + return (float)src > thresh ? 0 : saturate_cast(maxVal); + } + }; + + class ThreshTrunc : public ThreshOp + { + public: + ThreshTrunc(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {} + + template + __device__ T operator()(const T& src) const + { + return saturate_cast(fmin((float)src, thresh)); + } + }; + + class ThreshToZero : public ThreshOp + { + public: + ThreshToZero(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {} + + template + __device__ T operator()(const T& src) const + { + return (float)src > thresh ? src : 0; + } + }; + + class ThreshToZeroInv : public ThreshOp + { + public: + ThreshToZeroInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {} + + template + __device__ T operator()(const T& src) const + { + return (float)src > thresh ? 0 : src; + } + }; + + template + void threshold_caller(const DevMem2D_& src, const DevMem2D_& dst, float thresh, float maxVal, + cudaStream_t stream) + { + Op op(thresh, maxVal); + transform(src, dst, op, stream); + } + + template + void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, + cudaStream_t stream) + { + typedef void (*caller_t)(const DevMem2D_& src, const DevMem2D_& dst, float thresh, float maxVal, + cudaStream_t stream); + + static const caller_t callers[] = + { + threshold_caller, + threshold_caller, + threshold_caller, + threshold_caller, + threshold_caller + }; + + callers[type]((DevMem2D_)src, (DevMem2D_)dst, thresh, maxVal, stream); + } + + template void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream); + template void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream); + template void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream); + template void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream); + template void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream); + template void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream); }}} diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index cd7ee6fa7..fc686ee95 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -43,7 +43,7 @@ #include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/vecmath.hpp" -#include "transform.hpp" +#include "opencv2/gpu/device/transform.hpp" #include "internal_shared.hpp" using namespace cv::gpu; diff --git a/modules/gpu/src/cuda/matrix_operations.cu b/modules/gpu/src/cuda/matrix_operations.cu index 42e55b3f8..2ec794f65 100644 --- a/modules/gpu/src/cuda/matrix_operations.cu +++ b/modules/gpu/src/cuda/matrix_operations.cu @@ -42,6 +42,7 @@ #include "internal_shared.hpp" #include "opencv2/gpu/device/saturate_cast.hpp" +#include "opencv2/gpu/device/transform.hpp" using namespace cv::gpu::device; @@ -55,63 +56,6 @@ namespace cv { namespace gpu { namespace matrix_operations { template <> struct shift_and_sizeof { enum { shift = 2 }; }; template <> struct shift_and_sizeof { enum { shift = 2 }; }; template <> struct shift_and_sizeof { enum { shift = 3 }; }; - - template - struct ReadWriteTraits - { - enum {shift=1}; - - typedef T read_type; - typedef DT write_type; - }; - template - struct ReadWriteTraits - { - enum {shift=4}; - - typedef char4 read_type; - typedef char4 write_type; - }; - template - struct ReadWriteTraits - { - enum {shift=4}; - - typedef short4 read_type; - typedef char4 write_type; - }; - template - struct ReadWriteTraits - { - enum {shift=4}; - - typedef int4 read_type; - typedef char4 write_type; - }; - template - struct ReadWriteTraits - { - enum {shift=2}; - - typedef char2 read_type; - typedef short2 write_type; - }; - template - struct ReadWriteTraits - { - enum {shift=2}; - - typedef short2 read_type; - typedef short2 write_type; - }; - template - struct ReadWriteTraits - { - enum {shift=2}; - - typedef int2 read_type; - typedef short2 write_type; - }; /////////////////////////////////////////////////////////////////////////// ////////////////////////////////// CopyTo ///////////////////////////////// @@ -276,88 +220,64 @@ namespace cv { namespace gpu { namespace matrix_operations { //////////////////////////////// ConvertTo //////////////////////////////// /////////////////////////////////////////////////////////////////////////// - template - __global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta) + template + class Convertor { - typedef typename ReadWriteTraits::read_type read_type; - typedef typename ReadWriteTraits::write_type write_type; - const int shift = ReadWriteTraits::shift; + public: + Convertor(double alpha_, double beta_): alpha(alpha_), beta(beta_) {} - const size_t x = threadIdx.x + blockIdx.x * blockDim.x; - const size_t y = threadIdx.y + blockIdx.y * blockDim.y; - - if (y < height) + __device__ D operator()(const T& src) { - const T* src = (const T*)(srcmat + src_step * y); - DT* dst = (DT*)(dstmat + dst_step * y); - if ((x * shift) + shift - 1 < width) - { - read_type srcn_el = ((read_type*)src)[x]; - write_type dstn_el; - - const T* src1_el = (const T*) &srcn_el; - DT* dst1_el = (DT*) &dstn_el; - - for (int i = 0; i < shift; ++i) - dst1_el[i] = saturate_cast
(alpha * src1_el[i] + beta); - - ((write_type*)dst)[x] = dstn_el; - } - else - { - for (int i = 0; i < shift - 1; ++i) - if ((x * shift) + i < width) - dst[(x * shift) + i] = saturate_cast
(alpha * src[(x * shift) + i] + beta); - } + return saturate_cast(alpha * src + beta); } - } - typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream); - - template - void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream) + private: + double alpha, beta; + }; + + template + void cvt_(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, cudaStream_t stream) { - const int shift = ReadWriteTraits::shift; - - dim3 block(32, 8); - dim3 grid(divUp(width, block.x * shift), divUp(height, block.y)); - - convert_to<<>>(src.data, src.step, dst.data, dst.step, width, height, alpha, beta); - if (stream == 0) - cudaSafeCall( cudaThreadSynchronize() ); + Convertor op(alpha, beta); + transform((DevMem2D_)src, (DevMem2D_)dst, op, stream); } - void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream) + void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, + cudaStream_t stream = 0) { - static CvtFunc tab[8][8] = - { - {cvt_, cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, 0}, + typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, + cudaStream_t stream); - {cvt_, cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, 0}, - - {cvt_, cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, 0}, - - {cvt_, cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, 0}, - - {cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, cvt_, 0}, - - {cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, cvt_, 0}, - - {cvt_, cvt_, cvt_, - cvt_, cvt_, cvt_, cvt_, 0}, - - {0,0,0,0,0,0,0,0} + static const caller_t tab[8][8] = + { + {cvt_, cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, 0}, + + {cvt_, cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, 0}, + + {cvt_, cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, 0}, + + {cvt_, cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, 0}, + + {cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, cvt_, 0}, + + {cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, cvt_, 0}, + + {cvt_, cvt_, cvt_, + cvt_, cvt_, cvt_, cvt_, 0}, + + {0,0,0,0,0,0,0,0} }; - CvtFunc func = tab[sdepth][ddepth]; - if (func == 0) - cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); - func(src, dst, src.cols * channels, src.rows, alpha, beta, stream); + caller_t func = tab[sdepth][ddepth]; + if (!func) + cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); + + func(src, dst, alpha, beta, stream); } }}} diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index c8d516a35..6dd7dabb5 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -43,7 +43,7 @@ #include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/vecmath.hpp" -#include "transform.hpp" +#include "opencv2/gpu/device/transform.hpp" #include "internal_shared.hpp" using namespace cv::gpu; diff --git a/modules/gpu/src/cuda/surf.cu b/modules/gpu/src/cuda/surf.cu index a6aef8a35..00f62d475 100644 --- a/modules/gpu/src/cuda/surf.cu +++ b/modules/gpu/src/cuda/surf.cu @@ -46,7 +46,6 @@ //M*/ #include "internal_shared.hpp" -#include "surf_key_point.h" #include "opencv2/gpu/device/limits_gpu.hpp" using namespace cv::gpu; diff --git a/modules/gpu/src/cuda/surf_key_point.h b/modules/gpu/src/cuda/surf_key_point.h deleted file mode 100644 index 31370042e..000000000 --- a/modules/gpu/src/cuda/surf_key_point.h +++ /dev/null @@ -1,54 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_SURF_KEY_POINT_H__ -#define __OPENCV_SURF_KEY_POINT_H__ - -namespace cv -{ - namespace gpu - { - - } -} - -#endif // __OPENCV_SURF_KEY_POINT_H__ diff --git a/modules/gpu/src/cuda/transform.hpp b/modules/gpu/src/cuda/transform.hpp deleted file mode 100644 index b8f066e6e..000000000 --- a/modules/gpu/src/cuda/transform.hpp +++ /dev/null @@ -1,130 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_GPU_TRANSFORM_HPP__ -#define __OPENCV_GPU_TRANSFORM_HPP__ - -#include "internal_shared.hpp" - -namespace cv { namespace gpu { namespace device -{ - //! Mask accessor - template struct MaskReader_ - { - PtrStep_ mask; - explicit MaskReader_(PtrStep_ mask): mask(mask) {} - - __device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; } - }; - - //! Stub mask accessor - struct NoMask - { - __device__ bool operator()(int y, int x) const { return true; } - }; - - //! Transform kernels - - template - static __global__ void transform(const DevMem2D_ src, PtrStep_ dst, const Mask mask, UnOp op) - { - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < src.cols && y < src.rows && mask(y, x)) - { - T src_data = src.ptr(y)[x]; - dst.ptr(y)[x] = op(src_data); - } - } - - template - static __global__ void transform(const DevMem2D_ src1, const PtrStep_ src2, PtrStep_ dst, const Mask mask, BinOp op) - { - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < src1.cols && y < src1.rows && mask(y, x)) - { - T1 src1_data = src1.ptr(y)[x]; - T2 src2_data = src2.ptr(y)[x]; - dst.ptr(y)[x] = op(src1_data, src2_data); - } - } -}}} - -namespace cv -{ - namespace gpu - { - template - static void transform(const DevMem2D_& src, const DevMem2D_& dst, UnOp op, cudaStream_t stream) - { - dim3 threads(16, 16, 1); - dim3 grid(1, 1, 1); - - grid.x = divUp(src.cols, threads.x); - grid.y = divUp(src.rows, threads.y); - - device::transform<<>>(src, dst, device::NoMask(), op); - - if (stream == 0) - cudaSafeCall( cudaThreadSynchronize() ); - } - template - static void transform(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, BinOp op, cudaStream_t stream) - { - dim3 threads(16, 16, 1); - dim3 grid(1, 1, 1); - - grid.x = divUp(src1.cols, threads.x); - grid.y = divUp(src1.rows, threads.y); - - device::transform<<>>(src1, src2, dst, device::NoMask(), op); - - if (stream == 0) - cudaSafeCall( cudaThreadSynchronize() ); - } - } -} - -#endif // __OPENCV_GPU_TRANSFORM_HPP__ diff --git a/modules/gpu/src/cudastream.cpp b/modules/gpu/src/cudastream.cpp index 79abe4a65..4965f2527 100644 --- a/modules/gpu/src/cudastream.cpp +++ b/modules/gpu/src/cudastream.cpp @@ -80,7 +80,7 @@ namespace cv void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0); void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0); - void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0); + void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0); } } } @@ -204,7 +204,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, psrc = &(temp = src); dst.create( src.size(), rtype ); - matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta, impl->stream); + matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream); } diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index 4d61cc353..2c88722f5 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -74,6 +74,8 @@ void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); } void cv::gpu::max(const GpuMat&, double, GpuMat&) { throw_nogpu(); } void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); } +double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int) {throw_nogpu(); return 0.0;} +double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, const Stream&) {throw_nogpu(); return 0.0;} #else @@ -696,4 +698,72 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& st funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); } +//////////////////////////////////////////////////////////////////////// +// threshold + +namespace cv { namespace gpu { namespace mathfunc +{ + template + void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, + cudaStream_t stream); +}}} + +namespace +{ + void threshold_caller(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, + cudaStream_t stream = 0) + { + using namespace cv::gpu::mathfunc; + + typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, + cudaStream_t stream); + + static const caller_t callers[] = + { + threshold_gpu, threshold_gpu, + threshold_gpu, threshold_gpu, threshold_gpu, threshold_gpu, 0 + }; + + CV_Assert(src.channels() == 1 && src.depth() < CV_64F); + CV_Assert(type <= THRESH_TOZERO_INV); + + dst.create(src.size(), src.type()); + + if (src.depth() != CV_32F) + { + thresh = cvFloor(thresh); + maxVal = cvRound(maxVal); + } + + callers[src.depth()](src, dst, static_cast(thresh), static_cast(maxVal), type, stream); + } +} + +double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type) +{ + if (src.type() == CV_32FC1 && type == THRESH_TRUNC) + { + dst.create(src.size(), src.type()); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + nppSafeCall( nppiThreshold_32f_C1R(src.ptr(), src.step, + dst.ptr(), dst.step, sz, static_cast(thresh), NPP_CMP_GREATER) ); + } + else + { + threshold_caller(src, dst, thresh, maxVal, type); + } + + return thresh; +} + +double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, const Stream& stream) +{ + threshold_caller(src, dst, thresh, maxVal, type, StreamAccessor::getStream(stream)); + return thresh; +} + #endif \ No newline at end of file diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 3b866c945..693aa56ab 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -55,7 +55,6 @@ void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); } void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, const Stream&) { throw_nogpu(); } void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&) { throw_nogpu(); } void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, const Stream&) { throw_nogpu(); } -double cv::gpu::threshold(const GpuMat&, GpuMat&, double) { throw_nogpu(); return 0.0; } void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int) { throw_nogpu(); } void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&) { throw_nogpu(); } void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); } @@ -241,25 +240,6 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream)); } -//////////////////////////////////////////////////////////////////////// -// threshold - -double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh) -{ - CV_Assert(src.type() == CV_32FC1); - - dst.create( src.size(), src.type() ); - - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - nppSafeCall( nppiThreshold_32f_C1R(src.ptr(), src.step, - dst.ptr(), dst.step, sz, static_cast(thresh), NPP_CMP_GREATER) ); - - return thresh; -} - //////////////////////////////////////////////////////////////////////// // resize diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index e2a88cc03..887f64adb 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -90,7 +90,7 @@ namespace cv void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0); void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0); - void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0); + void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0); } } } @@ -193,7 +193,7 @@ namespace void convertToKernelCaller(const GpuMat& src, GpuMat& dst) { - matrix_operations::convert_to(src, src.depth(), dst, dst.depth(), src.channels(), 1.0, 0.0); + matrix_operations::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0); } } @@ -222,7 +222,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be dst.create( size(), rtype ); if (!noScale) - matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta); + matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta); else { typedef void (*convert_caller_t)(const GpuMat& src, GpuMat& dst); diff --git a/modules/gpu/src/opencv2/gpu/device/transform.hpp b/modules/gpu/src/opencv2/gpu/device/transform.hpp new file mode 100644 index 000000000..959cca235 --- /dev/null +++ b/modules/gpu/src/opencv2/gpu/device/transform.hpp @@ -0,0 +1,433 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_TRANSFORM_HPP__ +#define __OPENCV_GPU_TRANSFORM_HPP__ + +#include "internal_shared.hpp" +#include "vecmath.hpp" + +namespace cv { namespace gpu { namespace device +{ + //! Mask accessor + + class MaskReader + { + public: + explicit MaskReader(const PtrStep& mask_): mask(mask_) {} + + __device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; } + + private: + PtrStep mask; + }; + + struct NoMask + { + __device__ bool operator()(int y, int x) const { return true; } + }; + + //! Read Write Traits + + template + struct UnReadWriteTraits_ + { + enum {shift=1}; + }; + template + struct UnReadWriteTraits_ + { + enum {shift=4}; + }; + template + struct UnReadWriteTraits_ + { + enum {shift=2}; + }; + template struct UnReadWriteTraits + { + enum {shift=UnReadWriteTraits_::shift}; + + typedef typename TypeVec::vec_t read_type; + typedef typename TypeVec::vec_t write_type; + }; + + template + struct BinReadWriteTraits_ + { + enum {shift=1}; + }; + template + struct BinReadWriteTraits_ + { + enum {shift=4}; + }; + template + struct BinReadWriteTraits_ + { + enum {shift=2}; + }; + template struct BinReadWriteTraits + { + enum {shift=BinReadWriteTraits_::shift}; + + typedef typename TypeVec::vec_t read_type1; + typedef typename TypeVec::vec_t read_type2; + typedef typename TypeVec::vec_t write_type; + }; + + //! Transform kernels + + template struct OpUnroller; + template <> struct OpUnroller<1> + { + template + static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + } + + template + static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + } + }; + template <> struct OpUnroller<2> + { + template + static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src.y); + } + + template + static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src1.y, src2.y); + } + }; + template <> struct OpUnroller<3> + { + template + static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src.z); + } + + template + static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src1.y, src2.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src1.z, src2.z); + } + }; + template <> struct OpUnroller<4> + { + template + static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src.z); + if (mask(y, x_shifted + 3)) + dst.w = op(src.w); + } + + template + static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src1.y, src2.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src1.z, src2.z); + if (mask(y, x_shifted + 3)) + dst.w = op(src1.w, src2.w); + } + }; + + template + __global__ static void transformSmart(const DevMem2D_ src_, PtrStep_ dst_, const Mask mask, UnOp op) + { + typedef typename UnReadWriteTraits::read_type read_type; + typedef typename UnReadWriteTraits::write_type write_type; + const int shift = UnReadWriteTraits::shift; + + const int x = threadIdx.x + blockIdx.x * blockDim.x; + const int y = threadIdx.y + blockIdx.y * blockDim.y; + const int x_shifted = x * shift; + + if (y < src_.rows) + { + const T* src = src_.ptr(y); + D* dst = dst_.ptr(y); + + if (x_shifted + shift - 1 < src_.cols) + { + read_type src_n_el = ((const read_type*)src)[x]; + write_type dst_n_el; + + OpUnroller::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y); + + ((write_type*)dst)[x] = dst_n_el; + } + else + { + for (int real_x = x_shifted; real_x < src_.cols; ++real_x) + { + if (mask(y, real_x)) + dst[real_x] = op(src[real_x]); + } + } + } + } + + template + static __global__ void transformSimple(const DevMem2D_ src, PtrStep_ dst, const Mask mask, UnOp op) + { + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < src.cols && y < src.rows && mask(y, x)) + { + dst.ptr(y)[x] = op(src.ptr(y)[x]); + } + } + + template + __global__ static void transformSmart(const DevMem2D_ src1_, const PtrStep_ src2_, PtrStep_ dst_, + const Mask mask, BinOp op) + { + typedef typename BinReadWriteTraits::read_type1 read_type1; + typedef typename BinReadWriteTraits::read_type2 read_type2; + typedef typename BinReadWriteTraits::write_type write_type; + const int shift = BinReadWriteTraits::shift; + + const int x = threadIdx.x + blockIdx.x * blockDim.x; + const int y = threadIdx.y + blockIdx.y * blockDim.y; + const int x_shifted = x * shift; + + if (y < src1_.rows) + { + const T1* src1 = src1_.ptr(y); + const T2* src2 = src2_.ptr(y); + D* dst = dst_.ptr(y); + + if (x_shifted + shift - 1 < src1_.cols) + { + read_type1 src1_n_el = ((const read_type1*)src1)[x]; + read_type2 src2_n_el = ((const read_type2*)src2)[x]; + write_type dst_n_el; + + OpUnroller::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y); + + ((write_type*)dst)[x] = dst_n_el; + } + else + { + for (int real_x = x_shifted; real_x < src1_.cols; ++real_x) + { + if (mask(y, real_x)) + dst[real_x] = op(src1[real_x], src2[real_x]); + } + } + } + } + + template + static __global__ void transformSimple(const DevMem2D_ src1, const PtrStep_ src2, PtrStep_ dst, + const Mask mask, BinOp op) + { + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < src1.cols && y < src1.rows && mask(y, x)) + { + T1 src1_data = src1.ptr(y)[x]; + T2 src2_data = src2.ptr(y)[x]; + dst.ptr(y)[x] = op(src1_data, src2_data); + } + } +}}} + +namespace cv +{ + namespace gpu + { + template struct TransformChooser; + template<> struct TransformChooser + { + template + static void call(const DevMem2D_& src, const DevMem2D_& dst, UnOp op, const Mask& mask, + cudaStream_t stream = 0) + { + dim3 threads(16, 16, 1); + dim3 grid(1, 1, 1); + + grid.x = divUp(src.cols, threads.x); + grid.y = divUp(src.rows, threads.y); + + device::transformSimple<<>>(src, dst, mask, op); + + if (stream == 0) + cudaSafeCall( cudaThreadSynchronize() ); + } + + template + static void call(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, + BinOp op, const Mask& mask, cudaStream_t stream = 0) + { + dim3 threads(16, 16, 1); + dim3 grid(1, 1, 1); + + grid.x = divUp(src1.cols, threads.x); + grid.y = divUp(src1.rows, threads.y); + + device::transformSimple<<>>(src1, src2, dst, mask, op); + + if (stream == 0) + cudaSafeCall( cudaThreadSynchronize() ); + } + }; + template<> struct TransformChooser + { + template + static void call(const DevMem2D_& src, const DevMem2D_& dst, UnOp op, const Mask& mask, + cudaStream_t stream = 0) + { + const int shift = device::UnReadWriteTraits::shift; + + dim3 threads(16, 16, 1); + dim3 grid(1, 1, 1); + + grid.x = divUp(src.cols, threads.x * shift); + grid.y = divUp(src.rows, threads.y); + + device::transformSmart<<>>(src, dst, mask, op); + + if (stream == 0) + cudaSafeCall( cudaThreadSynchronize() ); + } + + template + static void call(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, + BinOp op, const Mask& mask, cudaStream_t stream = 0) + { + const int shift = device::BinReadWriteTraits::shift; + + dim3 threads(16, 16, 1); + dim3 grid(1, 1, 1); + + grid.x = divUp(src1.cols, threads.x * shift); + grid.y = divUp(src1.rows, threads.y); + + device::transformSmart<<>>(src1, src2, dst, mask, op); + + if (stream == 0) + cudaSafeCall( cudaThreadSynchronize() ); + } + }; + + template + static void transform_caller(const DevMem2D_& src, const DevMem2D_& dst, UnOp op, const Mask& mask, + cudaStream_t stream = 0) + { + TransformChooser::cn == 1 && device::VecTraits::cn == 1 && device::UnReadWriteTraits::shift != 1>::call(src, dst, op, mask, stream); + } + + template + static void transform(const DevMem2D_& src, const DevMem2D_& dst, UnOp op, cudaStream_t stream = 0) + { + transform_caller(src, dst, op, device::NoMask(), stream); + } + template + static void transform(const DevMem2D_& src, const DevMem2D_& dst, const PtrStep& mask, UnOp op, + cudaStream_t stream = 0) + { + transform_caller(src, dst, op, device::MaskReader(mask), stream); + } + + template + static void transform_caller(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, + BinOp op, const Mask& mask, cudaStream_t stream = 0) + { + TransformChooser::cn == 1 && device::VecTraits::cn == 1 && device::VecTraits::cn == 1 && device::BinReadWriteTraits::shift != 1>::call(src1, src2, dst, op, mask, stream); + } + + template + static void transform(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, + BinOp op, cudaStream_t stream = 0) + { + transform_caller(src1, src2, dst, op, device::NoMask(), stream); + } + template + static void transform(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, + const PtrStep& mask, BinOp op, cudaStream_t stream = 0) + { + transform_caller(src1, src2, dst, op, device::MaskReader(mask), stream); + } + } +} + +#endif // __OPENCV_GPU_TRANSFORM_HPP__ diff --git a/modules/gpu/src/opencv2/gpu/device/vecmath.hpp b/modules/gpu/src/opencv2/gpu/device/vecmath.hpp index dc04203e5..d34efe8bf 100644 --- a/modules/gpu/src/opencv2/gpu/device/vecmath.hpp +++ b/modules/gpu/src/opencv2/gpu/device/vecmath.hpp @@ -64,12 +64,16 @@ namespace cv template<> struct TypeVec { typedef uchar4 vec_t; }; template<> struct TypeVec { typedef char vec_t; }; + template<> struct TypeVec { typedef char vec_t; }; template<> struct TypeVec { typedef char1 vec_t; }; template<> struct TypeVec { typedef char2 vec_t; }; + template<> struct TypeVec { typedef char2 vec_t; }; template<> struct TypeVec { typedef char2 vec_t; }; template<> struct TypeVec { typedef char3 vec_t; }; + template<> struct TypeVec { typedef char3 vec_t; }; template<> struct TypeVec { typedef char3 vec_t; }; template<> struct TypeVec { typedef char4 vec_t; }; + template<> struct TypeVec { typedef char4 vec_t; }; template<> struct TypeVec { typedef char4 vec_t; }; template<> struct TypeVec { typedef ushort vec_t; }; @@ -117,6 +121,15 @@ namespace cv template<> struct TypeVec { typedef float4 vec_t; }; template<> struct TypeVec { typedef float4 vec_t; }; + template<> struct TypeVec { typedef double vec_t; }; + template<> struct TypeVec { typedef double1 vec_t; }; + template<> struct TypeVec { typedef double2 vec_t; }; + template<> struct TypeVec { typedef double2 vec_t; }; + template<> struct TypeVec { typedef double3 vec_t; }; + template<> struct TypeVec { typedef double3 vec_t; }; + template<> struct TypeVec { typedef double4 vec_t; }; + template<> struct TypeVec { typedef double4 vec_t; }; + template struct VecTraits; template<> struct VecTraits @@ -162,33 +175,40 @@ namespace cv static __device__ __host__ char all(char v) {return v;} static __device__ __host__ char make(char x) {return x;} }; + template<> struct VecTraits + { + typedef schar elem_t; + enum {cn=1}; + static __device__ __host__ schar all(schar v) {return v;} + static __device__ __host__ schar make(schar x) {return x;} + }; template<> struct VecTraits { - typedef char elem_t; + typedef schar elem_t; enum {cn=1}; - static __device__ __host__ char1 all(char v) {return make_char1(v);} - static __device__ __host__ char1 make(char x) {return make_char1(x);} + static __device__ __host__ char1 all(schar v) {return make_char1(v);} + static __device__ __host__ char1 make(schar x) {return make_char1(x);} }; template<> struct VecTraits { - typedef char elem_t; + typedef schar elem_t; enum {cn=2}; - static __device__ __host__ char2 all(char v) {return make_char2(v, v);} - static __device__ __host__ char2 make(char x, char y) {return make_char2(x, y);} + static __device__ __host__ char2 all(schar v) {return make_char2(v, v);} + static __device__ __host__ char2 make(schar x, schar y) {return make_char2(x, y);} }; template<> struct VecTraits { - typedef char elem_t; + typedef schar elem_t; enum {cn=3}; - static __device__ __host__ char3 all(char v) {return make_char3(v, v, v);} - static __device__ __host__ char3 make(char x, char y, char z) {return make_char3(x, y, z);} + static __device__ __host__ char3 all(schar v) {return make_char3(v, v, v);} + static __device__ __host__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);} }; template<> struct VecTraits { - typedef char elem_t; + typedef schar elem_t; enum {cn=4}; - static __device__ __host__ char4 all(char v) {return make_char4(v, v, v, v);} - static __device__ __host__ char4 make(char x, char y, char z, char w) {return make_char4(x, y, z, w);} + static __device__ __host__ char4 all(schar v) {return make_char4(v, v, v, v);} + static __device__ __host__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);} }; template<> struct VecTraits @@ -371,6 +391,42 @@ namespace cv static __device__ __host__ float4 make(float x, float y, float z, float w) {return make_float4(x, y, z, w);} }; + template<> struct VecTraits + { + typedef double elem_t; + enum {cn=1}; + static __device__ __host__ double all(double v) {return v;} + static __device__ __host__ double make(double x) {return x;} + }; + template<> struct VecTraits + { + typedef double elem_t; + enum {cn=1}; + static __device__ __host__ double1 all(double v) {return make_double1(v);} + static __device__ __host__ double1 make(double x) {return make_double1(x);} + }; + template<> struct VecTraits + { + typedef double elem_t; + enum {cn=2}; + static __device__ __host__ double2 all(double v) {return make_double2(v, v);} + static __device__ __host__ double2 make(double x, double y) {return make_double2(x, y);} + }; + template<> struct VecTraits + { + typedef double elem_t; + enum {cn=3}; + static __device__ __host__ double3 all(double v) {return make_double3(v, v, v);} + static __device__ __host__ double3 make(double x, double y, double z) {return make_double3(x, y, z);} + }; + template<> struct VecTraits + { + typedef double elem_t; + enum {cn=4}; + static __device__ __host__ double4 all(double v) {return make_double4(v, v, v, v);} + static __device__ __host__ double4 make(double x, double y, double z, double w) {return make_double4(x, y, z, w);} + }; + template struct SatCast; template struct SatCast<1, VecD> { diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index e0f2e004e..624e67fce 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -70,7 +70,6 @@ #include "opencv2/gpu/stream_accessor.hpp" #include "npp.h" #include "npp_staging.h" - #include "surf_key_point.h" #include "nvidia/NCV.hpp" #include "nvidia/NCVHaarObjectDetection.hpp" diff --git a/tests/gpu/src/imgproc_gpu.cpp b/tests/gpu/src/imgproc_gpu.cpp index 7766d80da..d3affcf17 100644 --- a/tests/gpu/src/imgproc_gpu.cpp +++ b/tests/gpu/src/imgproc_gpu.cpp @@ -180,30 +180,41 @@ void CV_GpuImageProcTest::run( int ) //////////////////////////////////////////////////////////////////////////////// // threshold -struct CV_GpuNppImageThresholdTest : public CV_GpuImageProcTest +struct CV_GpuImageThresholdTest : public CV_GpuImageProcTest { public: - CV_GpuNppImageThresholdTest() : CV_GpuImageProcTest( "GPU-NppImageThreshold", "threshold" ) {} + CV_GpuImageThresholdTest() : CV_GpuImageProcTest( "GPU-ImageThreshold", "threshold" ) {} int test(const Mat& img) { - if (img.type() != CV_32FC1) + if (img.type() != CV_8UC1 && img.type() != CV_32FC1) { ts->printf(CvTS::LOG, "\nUnsupported type\n"); return CvTS::OK; } + const double maxVal = img.type() == CV_8UC1 ? 255 : 1.0; + cv::RNG rng(*ts->get_rng()); - const double thresh = rng; - cv::Mat cpuRes; - cv::threshold(img, cpuRes, thresh, 0.0, THRESH_TRUNC); + int res = CvTS::OK; - GpuMat gpu1(img); - GpuMat gpuRes; - cv::gpu::threshold(gpu1, gpuRes, thresh); + for (int type = THRESH_BINARY; type <= THRESH_TOZERO_INV; ++type) + { + const double thresh = rng.uniform(0.0, maxVal); - return CheckNorm(cpuRes, gpuRes); + cv::Mat cpuRes; + cv::threshold(img, cpuRes, thresh, maxVal, type); + + GpuMat gpu1(img); + GpuMat gpuRes; + cv::gpu::threshold(gpu1, gpuRes, thresh, maxVal, type); + + if (CheckNorm(cpuRes, gpuRes) != CvTS::OK) + res = CvTS::FAIL_GENERIC; + } + + return res; } }; @@ -822,7 +833,7 @@ struct CV_GpuColumnSumTest: CvTest // Placing all test definitions in one place // makes us know about what tests are commented. -CV_GpuNppImageThresholdTest CV_GpuNppImageThreshold_test; +CV_GpuImageThresholdTest CV_GpuImageThreshold_test; CV_GpuNppImageResizeTest CV_GpuNppImageResize_test; CV_GpuNppImageCopyMakeBorderTest CV_GpuNppImageCopyMakeBorder_test; CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test;