From 1b00a3ed54f1ce2000418229357e04ddebaaadcd Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 18 Apr 2013 10:35:54 +0400 Subject: [PATCH 01/49] gpucodec module for video decoding/encoding --- cmake/OpenCVDetectCUDA.cmake | 3 + modules/gpu/CMakeLists.txt | 13 - modules/gpu/doc/video.rst | 447 ------------------ modules/gpu/include/opencv2/gpu.hpp | 205 -------- modules/gpu/perf/perf_video.cpp | 100 ---- modules/gpu/src/cuda/NV12ToARGB.cu | 201 -------- modules/gpu/src/cuda/rgb_to_yv12.cu | 175 ------- modules/gpu/src/precomp.hpp | 14 - modules/gpu/src/thread_wrappers.cpp | 254 ---------- modules/gpu/src/video_decoder.h | 116 ----- modules/gpucodec/CMakeLists.txt | 29 ++ modules/gpucodec/doc/gpucodec.rst | 9 + modules/gpucodec/doc/videodec.rst | 234 +++++++++ modules/gpucodec/doc/videoenc.rst | 219 +++++++++ modules/gpucodec/include/opencv2/gpucodec.hpp | 265 +++++++++++ modules/gpucodec/perf/perf_main.cpp | 47 ++ modules/gpucodec/perf/perf_precomp.cpp | 43 ++ modules/gpucodec/perf/perf_precomp.hpp | 64 +++ modules/gpucodec/perf/perf_video.cpp | 162 +++++++ modules/gpucodec/src/cuda/nv12_to_rgb.cu | 193 ++++++++ modules/gpucodec/src/cuda/rgb_to_yv12.cu | 170 +++++++ .../src/cuvid_video_source.cpp | 11 +- .../src/cuvid_video_source.h | 66 ++- .../src/ffmpeg_video_source.cpp | 22 +- .../src/ffmpeg_video_source.h | 56 +-- modules/{gpu => gpucodec}/src/frame_queue.cpp | 10 +- modules/{gpu => gpucodec}/src/frame_queue.h | 76 ++- modules/gpucodec/src/precomp.cpp | 43 ++ modules/gpucodec/src/precomp.hpp | 79 ++++ modules/gpucodec/src/thread.cpp | 174 +++++++ .../src/thread.h} | 71 +-- .../{gpu => gpucodec}/src/video_decoder.cpp | 7 +- modules/gpucodec/src/video_decoder.h | 111 +++++ .../{gpu => gpucodec}/src/video_parser.cpp | 6 +- modules/{gpu => gpucodec}/src/video_parser.h | 84 ++-- .../{gpu => gpucodec}/src/video_reader.cpp | 57 +-- .../{gpu => gpucodec}/src/video_writer.cpp | 44 +- modules/gpucodec/test/test_main.cpp | 45 ++ modules/gpucodec/test/test_precomp.cpp | 43 ++ modules/gpucodec/test/test_precomp.hpp | 60 +++ modules/{gpu => gpucodec}/test/test_video.cpp | 54 +-- modules/superres/CMakeLists.txt | 2 +- modules/superres/src/frame_source.cpp | 6 +- modules/superres/src/precomp.hpp | 4 + samples/gpu/CMakeLists.txt | 3 +- samples/gpu/video_reader.cpp | 8 +- samples/gpu/video_writer.cpp | 8 +- 47 files changed, 2247 insertions(+), 1866 deletions(-) delete mode 100644 modules/gpu/src/cuda/NV12ToARGB.cu delete mode 100644 modules/gpu/src/cuda/rgb_to_yv12.cu delete mode 100644 modules/gpu/src/thread_wrappers.cpp delete mode 100644 modules/gpu/src/video_decoder.h create mode 100644 modules/gpucodec/CMakeLists.txt create mode 100644 modules/gpucodec/doc/gpucodec.rst create mode 100644 modules/gpucodec/doc/videodec.rst create mode 100644 modules/gpucodec/doc/videoenc.rst create mode 100644 modules/gpucodec/include/opencv2/gpucodec.hpp create mode 100644 modules/gpucodec/perf/perf_main.cpp create mode 100644 modules/gpucodec/perf/perf_precomp.cpp create mode 100644 modules/gpucodec/perf/perf_precomp.hpp create mode 100644 modules/gpucodec/perf/perf_video.cpp create mode 100644 modules/gpucodec/src/cuda/nv12_to_rgb.cu create mode 100644 modules/gpucodec/src/cuda/rgb_to_yv12.cu rename modules/{gpu => gpucodec}/src/cuvid_video_source.cpp (96%) rename modules/{gpu => gpucodec}/src/cuvid_video_source.h (61%) rename modules/{gpu => gpucodec}/src/ffmpeg_video_source.cpp (94%) rename modules/{gpu => gpucodec}/src/ffmpeg_video_source.h (69%) rename modules/{gpu => gpucodec}/src/frame_queue.cpp (94%) rename modules/{gpu => gpucodec}/src/frame_queue.h (55%) create mode 100644 modules/gpucodec/src/precomp.cpp create mode 100644 modules/gpucodec/src/precomp.hpp create mode 100644 modules/gpucodec/src/thread.cpp rename modules/{gpu/src/thread_wrappers.h => gpucodec/src/thread.h} (61%) rename modules/{gpu => gpucodec}/src/video_decoder.cpp (97%) create mode 100644 modules/gpucodec/src/video_decoder.h rename modules/{gpu => gpucodec}/src/video_parser.cpp (98%) rename modules/{gpu => gpucodec}/src/video_parser.h (54%) rename modules/{gpu => gpucodec}/src/video_reader.cpp (89%) rename modules/{gpu => gpucodec}/src/video_writer.cpp (97%) create mode 100644 modules/gpucodec/test/test_main.cpp create mode 100644 modules/gpucodec/test/test_precomp.cpp create mode 100644 modules/gpucodec/test/test_precomp.hpp rename modules/{gpu => gpucodec}/test/test_video.cpp (75%) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index f3d101ab2..f1861fba7 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -28,6 +28,9 @@ if(CUDA_FOUND) if(WITH_NVCUVID) find_cuda_helper_libs(nvcuvid) + if(WIN32) + find_cuda_helper_libs(nvcuvenc) + endif() set(HAVE_NVCUVID 1) endif() diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index f01a23b84..6f2f1145e 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -39,19 +39,6 @@ if(HAVE_CUDA) ocv_cuda_compile(cuda_objs ${lib_cuda} ${ncv_cuda}) set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) - - if(WITH_NVCUVID) - set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY}) - endif() - - if(WIN32) - find_cuda_helper_libs(nvcuvenc) - set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvenc_LIBRARY}) - endif() - - if(WITH_FFMPEG) - set(cuda_link_libs ${cuda_link_libs} ${HIGHGUI_LIBRARIES}) - endif() else() set(lib_cuda "") set(cuda_objs "") diff --git a/modules/gpu/doc/video.rst b/modules/gpu/doc/video.rst index f96410037..bb7c8263e 100644 --- a/modules/gpu/doc/video.rst +++ b/modules/gpu/doc/video.rst @@ -687,453 +687,6 @@ Releases all inner buffer's memory. -gpu::VideoWriter_GPU ---------------------- -Video writer class. - -.. ocv:class:: gpu::VideoWriter_GPU - -The class uses H264 video codec. - -.. note:: Currently only Windows platform is supported. - - - -gpu::VideoWriter_GPU::VideoWriter_GPU -------------------------------------- -Constructors. - -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU() -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) - - :param fileName: Name of the output video file. Only AVI file format is supported. - - :param frameSize: Size of the input video frames. - - :param fps: Framerate of the created video stream. - - :param params: Encoder parameters. See :ocv:struct:`gpu::VideoWriter_GPU::EncoderParams` . - - :param format: Surface format of input frames ( ``SF_UYVY`` , ``SF_YUY2`` , ``SF_YV12`` , ``SF_NV12`` , ``SF_IYUV`` , ``SF_BGR`` or ``SF_GRAY``). BGR or gray frames will be converted to YV12 format before encoding, frames with other formats will be used as is. - - :param encoderCallback: Callbacks for video encoder. See :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` . Use it if you want to work with raw video stream. - -The constructors initialize video writer. FFMPEG is used to write videos. User can implement own multiplexing with :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` . - - - -gpu::VideoWriter_GPU::open --------------------------- -Initializes or reinitializes video writer. - -.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) -.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) -.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) -.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) - -The method opens video writer. Parameters are the same as in the constructor :ocv:func:`gpu::VideoWriter_GPU::VideoWriter_GPU` . The method throws :ocv:class:`Exception` if error occurs. - - - -gpu::VideoWriter_GPU::isOpened ------------------------------- -Returns true if video writer has been successfully initialized. - -.. ocv:function:: bool gpu::VideoWriter_GPU::isOpened() const - - - -gpu::VideoWriter_GPU::close ---------------------------- -Releases the video writer. - -.. ocv:function:: void gpu::VideoWriter_GPU::close() - - - -gpu::VideoWriter_GPU::write ---------------------------- -Writes the next video frame. - -.. ocv:function:: void gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat& image, bool lastFrame = false) - - :param image: The written frame. - - :param lastFrame: Indicates that it is end of stream. The parameter can be ignored. - -The method write the specified image to video file. The image must have the same size and the same surface format as has been specified when opening the video writer. - - - -gpu::VideoWriter_GPU::EncoderParams ------------------------------------ -.. ocv:struct:: gpu::VideoWriter_GPU::EncoderParams - -Different parameters for CUDA video encoder. :: - - struct EncoderParams - { - int P_Interval; // NVVE_P_INTERVAL, - int IDR_Period; // NVVE_IDR_PERIOD, - int DynamicGOP; // NVVE_DYNAMIC_GOP, - int RCType; // NVVE_RC_TYPE, - int AvgBitrate; // NVVE_AVG_BITRATE, - int PeakBitrate; // NVVE_PEAK_BITRATE, - int QP_Level_Intra; // NVVE_QP_LEVEL_INTRA, - int QP_Level_InterP; // NVVE_QP_LEVEL_INTER_P, - int QP_Level_InterB; // NVVE_QP_LEVEL_INTER_B, - int DeblockMode; // NVVE_DEBLOCK_MODE, - int ProfileLevel; // NVVE_PROFILE_LEVEL, - int ForceIntra; // NVVE_FORCE_INTRA, - int ForceIDR; // NVVE_FORCE_IDR, - int ClearStat; // NVVE_CLEAR_STAT, - int DIMode; // NVVE_SET_DEINTERLACE, - int Presets; // NVVE_PRESETS, - int DisableCabac; // NVVE_DISABLE_CABAC, - int NaluFramingType; // NVVE_CONFIGURE_NALU_FRAMING_TYPE - int DisableSPSPPS; // NVVE_DISABLE_SPS_PPS - - EncoderParams(); - explicit EncoderParams(const String& configFile); - - void load(const String& configFile); - void save(const String& configFile) const; - }; - - - -gpu::VideoWriter_GPU::EncoderParams::EncoderParams --------------------------------------------------- -Constructors. - -.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams() -.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams(const String& configFile) - - :param configFile: Config file name. - -Creates default parameters or reads parameters from config file. - - - -gpu::VideoWriter_GPU::EncoderParams::load ------------------------------------------ -Reads parameters from config file. - -.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::load(const String& configFile) - - :param configFile: Config file name. - - - -gpu::VideoWriter_GPU::EncoderParams::save ------------------------------------------ -Saves parameters to config file. - -.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::save(const String& configFile) const - - :param configFile: Config file name. - - - -gpu::VideoWriter_GPU::EncoderCallBack -------------------------------------- -.. ocv:class:: gpu::VideoWriter_GPU::EncoderCallBack - -Callbacks for CUDA video encoder. :: - - class EncoderCallBack - { - public: - enum PicType - { - IFRAME = 1, - PFRAME = 2, - BFRAME = 3 - }; - - virtual ~EncoderCallBack() {} - - virtual unsigned char* acquireBitStream(int* bufferSize) = 0; - virtual void releaseBitStream(unsigned char* data, int size) = 0; - virtual void onBeginFrame(int frameNumber, PicType picType) = 0; - virtual void onEndFrame(int frameNumber, PicType picType) = 0; - }; - - - -gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream -------------------------------------------------------- -Callback function to signal the start of bitstream that is to be encoded. - -.. ocv:function:: virtual uchar* gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream(int* bufferSize) = 0 - -Callback must allocate buffer for CUDA encoder and return pointer to it and it's size. - - - -gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream -------------------------------------------------------- -Callback function to signal that the encoded bitstream is ready to be written to file. - -.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream(unsigned char* data, int size) = 0 - - - -gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame ---------------------------------------------------- -Callback function to signal that the encoding operation on the frame has started. - -.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame(int frameNumber, PicType picType) = 0 - - :param picType: Specify frame type (I-Frame, P-Frame or B-Frame). - - - -gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame -------------------------------------------------- -Callback function signals that the encoding operation on the frame has finished. - -.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame(int frameNumber, PicType picType) = 0 - - :param picType: Specify frame type (I-Frame, P-Frame or B-Frame). - - - -gpu::VideoReader_GPU --------------------- -Class for reading video from files. - -.. ocv:class:: gpu::VideoReader_GPU - -.. note:: Currently only Windows and Linux platforms are supported. - - - -gpu::VideoReader_GPU::Codec ---------------------------- - -Video codecs supported by :ocv:class:`gpu::VideoReader_GPU` . - -.. ocv:enum:: gpu::VideoReader_GPU::Codec - - .. ocv:emember:: MPEG1 = 0 - .. ocv:emember:: MPEG2 - .. ocv:emember:: MPEG4 - .. ocv:emember:: VC1 - .. ocv:emember:: H264 - .. ocv:emember:: JPEG - .. ocv:emember:: H264_SVC - .. ocv:emember:: H264_MVC - - .. ocv:emember:: Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')) - - Y,U,V (4:2:0) - - .. ocv:emember:: Uncompressed_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')) - - Y,V,U (4:2:0) - - .. ocv:emember:: Uncompressed_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')) - - Y,UV (4:2:0) - - .. ocv:emember:: Uncompressed_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')) - - YUYV/YUY2 (4:2:2) - - .. ocv:emember:: Uncompressed_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) - - UYVY (4:2:2) - - -gpu::VideoReader_GPU::ChromaFormat ----------------------------------- - -Chroma formats supported by :ocv:class:`gpu::VideoReader_GPU` . - -.. ocv:enum:: gpu::VideoReader_GPU::ChromaFormat - - .. ocv:emember:: Monochrome = 0 - .. ocv:emember:: YUV420 - .. ocv:emember:: YUV422 - .. ocv:emember:: YUV444 - - -gpu::VideoReader_GPU::FormatInfo --------------------------------- -.. ocv:struct:: gpu::VideoReader_GPU::FormatInfo - -Struct providing information about video file format. :: - - struct FormatInfo - { - Codec codec; - ChromaFormat chromaFormat; - int width; - int height; - }; - - -gpu::VideoReader_GPU::VideoReader_GPU -------------------------------------- -Constructors. - -.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU() -.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const String& filename) -.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const cv::Ptr& source) - - :param filename: Name of the input video file. - - :param source: Video file parser implemented by user. - -The constructors initialize video reader. FFMPEG is used to read videos. User can implement own demultiplexing with :ocv:class:`gpu::VideoReader_GPU::VideoSource` . - - - -gpu::VideoReader_GPU::open --------------------------- -Initializes or reinitializes video reader. - -.. ocv:function:: void gpu::VideoReader_GPU::open(const String& filename) -.. ocv:function:: void gpu::VideoReader_GPU::open(const cv::Ptr& source) - -The method opens video reader. Parameters are the same as in the constructor :ocv:func:`gpu::VideoReader_GPU::VideoReader_GPU` . The method throws :ocv:class:`Exception` if error occurs. - - - -gpu::VideoReader_GPU::isOpened ------------------------------- -Returns true if video reader has been successfully initialized. - -.. ocv:function:: bool gpu::VideoReader_GPU::isOpened() const - - - -gpu::VideoReader_GPU::close ---------------------------- -Releases the video reader. - -.. ocv:function:: void gpu::VideoReader_GPU::close() - - - -gpu::VideoReader_GPU::read --------------------------- -Grabs, decodes and returns the next video frame. - -.. ocv:function:: bool gpu::VideoReader_GPU::read(GpuMat& image) - -If no frames has been grabbed (there are no more frames in video file), the methods return ``false`` . The method throws :ocv:class:`Exception` if error occurs. - - - -gpu::VideoReader_GPU::format ----------------------------- -Returns information about video file format. - -.. ocv:function:: FormatInfo gpu::VideoReader_GPU::format() const - -The method throws :ocv:class:`Exception` if video reader wasn't initialized. - - - -gpu::VideoReader_GPU::dumpFormat --------------------------------- -Dump information about video file format to specified stream. - -.. ocv:function:: void gpu::VideoReader_GPU::dumpFormat(std::ostream& st) - - :param st: Output stream. - -The method throws :ocv:class:`Exception` if video reader wasn't initialized. - - - -gpu::VideoReader_GPU::VideoSource ------------------------------------ -.. ocv:class:: gpu::VideoReader_GPU::VideoSource - -Interface for video demultiplexing. :: - - class VideoSource - { - public: - VideoSource(); - virtual ~VideoSource() {} - - virtual FormatInfo format() const = 0; - virtual void start() = 0; - virtual void stop() = 0; - virtual bool isStarted() const = 0; - virtual bool hasError() const = 0; - - protected: - bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream = false); - }; - -User can implement own demultiplexing by implementing this interface. - - - -gpu::VideoReader_GPU::VideoSource::format ------------------------------------------ -Returns information about video file format. - -.. ocv:function:: virtual FormatInfo gpu::VideoReader_GPU::VideoSource::format() const = 0 - - - -gpu::VideoReader_GPU::VideoSource::start ----------------------------------------- -Starts processing. - -.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::start() = 0 - -Implementation must create own thread with video processing and call periodic :ocv:func:`gpu::VideoReader_GPU::VideoSource::parseVideoData` . - - - -gpu::VideoReader_GPU::VideoSource::stop ---------------------------------------- -Stops processing. - -.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::stop() = 0 - - - -gpu::VideoReader_GPU::VideoSource::isStarted --------------------------------------------- -Returns ``true`` if processing was successfully started. - -.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::isStarted() const = 0 - - - -gpu::VideoReader_GPU::VideoSource::hasError -------------------------------------------- -Returns ``true`` if error occured during processing. - -.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::hasError() const = 0 - - - -gpu::VideoReader_GPU::VideoSource::parseVideoData -------------------------------------------------- -Parse next video frame. Implementation must call this method after new frame was grabbed. - -.. ocv:function:: bool gpu::VideoReader_GPU::VideoSource::parseVideoData(const uchar* data, size_t size, bool endOfStream = false) - - :param data: Pointer to frame data. Can be ``NULL`` if ``endOfStream`` if ``true`` . - - :param size: Size in bytes of current frame. - - :param endOfStream: Indicates that it is end of stream. - - - .. [Brox2004] T. Brox, A. Bruhn, N. Papenberg, J. Weickert. *High accuracy optical flow estimation based on a theory for warping*. ECCV 2004. .. [FGD2003] Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian. *Foreground Object Detection from Videos Containing Complex Background*. ACM MM2003 9p, 2003. .. [MOG2001] P. KadewTraKuPong and R. Bowden. *An improved adaptive background mixture model for real-time tracking with shadow detection*. Proc. 2nd European Workshop on Advanced Video-Based Surveillance Systems, 2001 diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp index 84de397dc..0b13fc01d 100644 --- a/modules/gpu/include/opencv2/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu.hpp @@ -2156,211 +2156,6 @@ private: GpuMat buf_; }; -////////////////////////////////// Video Encoding ////////////////////////////////// - -// Works only under Windows -// Supports olny H264 video codec and AVI files -class CV_EXPORTS VideoWriter_GPU -{ -public: - struct EncoderParams; - - // Callbacks for video encoder, use it if you want to work with raw video stream - class EncoderCallBack; - - enum SurfaceFormat - { - SF_UYVY = 0, - SF_YUY2, - SF_YV12, - SF_NV12, - SF_IYUV, - SF_BGR, - SF_GRAY = SF_BGR - }; - - VideoWriter_GPU(); - VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); - VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); - VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); - VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); - ~VideoWriter_GPU(); - - // all methods throws cv::Exception if error occurs - void open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); - void open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); - void open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); - void open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); - - bool isOpened() const; - void close(); - - void write(const cv::gpu::GpuMat& image, bool lastFrame = false); - - struct CV_EXPORTS EncoderParams - { - int P_Interval; // NVVE_P_INTERVAL, - int IDR_Period; // NVVE_IDR_PERIOD, - int DynamicGOP; // NVVE_DYNAMIC_GOP, - int RCType; // NVVE_RC_TYPE, - int AvgBitrate; // NVVE_AVG_BITRATE, - int PeakBitrate; // NVVE_PEAK_BITRATE, - int QP_Level_Intra; // NVVE_QP_LEVEL_INTRA, - int QP_Level_InterP; // NVVE_QP_LEVEL_INTER_P, - int QP_Level_InterB; // NVVE_QP_LEVEL_INTER_B, - int DeblockMode; // NVVE_DEBLOCK_MODE, - int ProfileLevel; // NVVE_PROFILE_LEVEL, - int ForceIntra; // NVVE_FORCE_INTRA, - int ForceIDR; // NVVE_FORCE_IDR, - int ClearStat; // NVVE_CLEAR_STAT, - int DIMode; // NVVE_SET_DEINTERLACE, - int Presets; // NVVE_PRESETS, - int DisableCabac; // NVVE_DISABLE_CABAC, - int NaluFramingType; // NVVE_CONFIGURE_NALU_FRAMING_TYPE - int DisableSPSPPS; // NVVE_DISABLE_SPS_PPS - - EncoderParams(); - explicit EncoderParams(const String& configFile); - - void load(const String& configFile); - void save(const String& configFile) const; - }; - - EncoderParams getParams() const; - - class CV_EXPORTS EncoderCallBack - { - public: - enum PicType - { - IFRAME = 1, - PFRAME = 2, - BFRAME = 3 - }; - - virtual ~EncoderCallBack() {} - - // callback function to signal the start of bitstream that is to be encoded - // must return pointer to buffer - virtual uchar* acquireBitStream(int* bufferSize) = 0; - - // callback function to signal that the encoded bitstream is ready to be written to file - virtual void releaseBitStream(unsigned char* data, int size) = 0; - - // callback function to signal that the encoding operation on the frame has started - virtual void onBeginFrame(int frameNumber, PicType picType) = 0; - - // callback function signals that the encoding operation on the frame has finished - virtual void onEndFrame(int frameNumber, PicType picType) = 0; - }; - -private: - VideoWriter_GPU(const VideoWriter_GPU&); - VideoWriter_GPU& operator=(const VideoWriter_GPU&); - - class Impl; - std::auto_ptr impl_; -}; - - -////////////////////////////////// Video Decoding ////////////////////////////////////////// - -namespace detail -{ - class FrameQueue; - class VideoParser; -} - -class CV_EXPORTS VideoReader_GPU -{ -public: - enum Codec - { - MPEG1 = 0, - MPEG2, - MPEG4, - VC1, - H264, - JPEG, - H264_SVC, - H264_MVC, - - Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), // Y,U,V (4:2:0) - Uncompressed_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,V,U (4:2:0) - Uncompressed_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,UV (4:2:0) - Uncompressed_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), // YUYV/YUY2 (4:2:2) - Uncompressed_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')), // UYVY (4:2:2) - }; - - enum ChromaFormat - { - Monochrome=0, - YUV420, - YUV422, - YUV444, - }; - - struct FormatInfo - { - Codec codec; - ChromaFormat chromaFormat; - int width; - int height; - }; - - class VideoSource; - - VideoReader_GPU(); - explicit VideoReader_GPU(const String& filename); - explicit VideoReader_GPU(const cv::Ptr& source); - - ~VideoReader_GPU(); - - void open(const String& filename); - void open(const cv::Ptr& source); - bool isOpened() const; - - void close(); - - bool read(GpuMat& image); - - FormatInfo format() const; - void dumpFormat(std::ostream& st); - - class CV_EXPORTS VideoSource - { - public: - VideoSource() : frameQueue_(0), videoParser_(0) {} - virtual ~VideoSource() {} - - virtual FormatInfo format() const = 0; - virtual void start() = 0; - virtual void stop() = 0; - virtual bool isStarted() const = 0; - virtual bool hasError() const = 0; - - void setFrameQueue(detail::FrameQueue* frameQueue) { frameQueue_ = frameQueue; } - void setVideoParser(detail::VideoParser* videoParser) { videoParser_ = videoParser; } - - protected: - bool parseVideoData(const uchar* data, size_t size, bool endOfStream = false); - - private: - VideoSource(const VideoSource&); - VideoSource& operator =(const VideoSource&); - - detail::FrameQueue* frameQueue_; - detail::VideoParser* videoParser_; - }; - -private: - VideoReader_GPU(const VideoReader_GPU&); - VideoReader_GPU& operator =(const VideoReader_GPU&); - - class Impl; - std::auto_ptr impl_; -}; - //! removes points (CV_32FC2, single row matrix) with zero mask value CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask); diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index c69b9606c..59efd2e4a 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -1005,103 +1005,3 @@ PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG, } #endif - -////////////////////////////////////////////////////// -// VideoReader - -#if defined(HAVE_NVCUVID) && BUILD_WITH_VIDEO_INPUT_SUPPORT - -PERF_TEST_P(Video, DISABLED_Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi")) -{ - declare.time(20); - - const string inputFile = perf::TestBase::getDataPath(GetParam()); - - if (PERF_RUN_GPU()) - { - cv::gpu::VideoReader_GPU d_reader(inputFile); - ASSERT_TRUE( d_reader.isOpened() ); - - cv::gpu::GpuMat frame; - - TEST_CYCLE_N(10) d_reader.read(frame); - - GPU_SANITY_CHECK(frame); - } - else - { - cv::VideoCapture reader(inputFile); - ASSERT_TRUE( reader.isOpened() ); - - cv::Mat frame; - - TEST_CYCLE_N(10) reader >> frame; - - CPU_SANITY_CHECK(frame); - } -} - -#endif - -////////////////////////////////////////////////////// -// VideoWriter - -#if defined(HAVE_NVCUVID) && defined(WIN32) - -PERF_TEST_P(Video, DISABLED_Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi")) -{ - declare.time(30); - - const string inputFile = perf::TestBase::getDataPath(GetParam()); - const string outputFile = cv::tempfile(".avi"); - - const double FPS = 25.0; - - cv::VideoCapture reader(inputFile); - ASSERT_TRUE( reader.isOpened() ); - - cv::Mat frame; - - if (PERF_RUN_GPU()) - { - cv::gpu::VideoWriter_GPU d_writer; - - cv::gpu::GpuMat d_frame; - - for (int i = 0; i < 10; ++i) - { - reader >> frame; - ASSERT_FALSE(frame.empty()); - - d_frame.upload(frame); - - if (!d_writer.isOpened()) - d_writer.open(outputFile, frame.size(), FPS); - - startTimer(); next(); - d_writer.write(d_frame); - stopTimer(); - } - } - else - { - cv::VideoWriter writer; - - for (int i = 0; i < 10; ++i) - { - reader >> frame; - ASSERT_FALSE(frame.empty()); - - if (!writer.isOpened()) - writer.open(outputFile, CV_FOURCC('X', 'V', 'I', 'D'), FPS, frame.size()); - - startTimer(); next(); - writer.write(frame); - stopTimer(); - } - } - - SANITY_CHECK(frame); -} - -#endif diff --git a/modules/gpu/src/cuda/NV12ToARGB.cu b/modules/gpu/src/cuda/NV12ToARGB.cu deleted file mode 100644 index 09906613f..000000000 --- a/modules/gpu/src/cuda/NV12ToARGB.cu +++ /dev/null @@ -1,201 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -/* - * NV12ToARGB color space conversion CUDA kernel - * - * This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar) - * source and converts to output in ARGB format - */ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" - -namespace cv { namespace gpu { namespace cudev { - namespace video_decoding - { - __constant__ uint constAlpha = ((uint)0xff << 24); - - __constant__ float constHueColorSpaceMat[9]; - - void loadHueCSC(float hueCSC[9]) - { - cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) ); - } - - __device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue) - { - float luma, chromaCb, chromaCr; - - // Prepare for hue adjustment - luma = (float)yuvi[0]; - chromaCb = (float)((int)yuvi[1] - 512.0f); - chromaCr = (float)((int)yuvi[2] - 512.0f); - - // Convert YUV To RGB with hue adjustment - *red = (luma * constHueColorSpaceMat[0]) + - (chromaCb * constHueColorSpaceMat[1]) + - (chromaCr * constHueColorSpaceMat[2]); - - *green = (luma * constHueColorSpaceMat[3]) + - (chromaCb * constHueColorSpaceMat[4]) + - (chromaCr * constHueColorSpaceMat[5]); - - *blue = (luma * constHueColorSpaceMat[6]) + - (chromaCb * constHueColorSpaceMat[7]) + - (chromaCr * constHueColorSpaceMat[8]); - } - - __device__ uint RGBAPACK_10bit(float red, float green, float blue, uint alpha) - { - uint ARGBpixel = 0; - - // Clamp final 10 bit results - red = ::fmin(::fmax(red, 0.0f), 1023.f); - green = ::fmin(::fmax(green, 0.0f), 1023.f); - blue = ::fmin(::fmax(blue, 0.0f), 1023.f); - - // Convert to 8 bit unsigned integers per color component - ARGBpixel = (((uint)blue >> 2) | - (((uint)green >> 2) << 8) | - (((uint)red >> 2) << 16) | - (uint)alpha); - - return ARGBpixel; - } - - // CUDA kernel for outputing the final ARGB output from NV12 - - #define COLOR_COMPONENT_BIT_SIZE 10 - #define COLOR_COMPONENT_MASK 0x3FF - - __global__ void NV12ToARGB(uchar* srcImage, size_t nSourcePitch, - uint* dstImage, size_t nDestPitch, - uint width, uint height) - { - // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread - const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= width || y >= height) - return; - - // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. - // if we move to texture we could read 4 luminance values - - uint yuv101010Pel[2]; - - yuv101010Pel[0] = (srcImage[y * nSourcePitch + x ]) << 2; - yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2; - - const size_t chromaOffset = nSourcePitch * height; - - const int y_chroma = y >> 1; - - if (y & 1) // odd scanline ? - { - uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ]; - uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1]; - - if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically - { - chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1; - chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1; - } - - yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - - yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - } - else - { - yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - - yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - } - - // this steps performs the color conversion - uint yuvi[6]; - float red[2], green[2], blue[2]; - - yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK ); - yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); - yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); - - yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK ); - yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); - yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); - - // YUV to RGB Transformation conversion - YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]); - YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]); - - // Clamp the results to RGBA - - const size_t dstImagePitch = nDestPitch >> 2; - - dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha); - dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha); - } - - void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz interopFrame, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y)); - - NV12ToARGB<<>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step, - interopFrame.cols, interopFrame.rows); - - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - } -}}} - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/rgb_to_yv12.cu b/modules/gpu/src/cuda/rgb_to_yv12.cu deleted file mode 100644 index 3e5664bab..000000000 --- a/modules/gpu/src/cuda/rgb_to_yv12.cu +++ /dev/null @@ -1,175 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/vec_traits.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace video_encoding - { - __device__ __forceinline__ void rgbtoy(const uchar b, const uchar g, const uchar r, uchar& y) - { - y = static_cast(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100); - } - - __device__ __forceinline__ void rgbtoyuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v) - { - rgbtoy(b, g, r, y); - u = static_cast(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100); - v = static_cast(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100); - } - - __global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst) - { - const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; - const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; - - if (x + 1 >= src.cols || y + 1 >= src.rows) - return; - - // get pointers to the data - const size_t planeSize = src.rows * dst.step; - PtrStepb y_plane(dst.data, dst.step); - PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2); - PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2); - - uchar pix; - uchar y_val, u_val, v_val; - - pix = src(y, x); - rgbtoy(pix, pix, pix, y_val); - y_plane(y, x) = y_val; - - pix = src(y, x + 1); - rgbtoy(pix, pix, pix, y_val); - y_plane(y, x + 1) = y_val; - - pix = src(y + 1, x); - rgbtoy(pix, pix, pix, y_val); - y_plane(y + 1, x) = y_val; - - pix = src(y + 1, x + 1); - rgbtoyuv(pix, pix, pix, y_val, u_val, v_val); - y_plane(y + 1, x + 1) = y_val; - u_plane(y / 2, x / 2) = u_val; - v_plane(y / 2, x / 2) = v_val; - } - - template - __global__ void BGR_to_YV12(const PtrStepSz src, PtrStepb dst) - { - const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; - const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; - - if (x + 1 >= src.cols || y + 1 >= src.rows) - return; - - // get pointers to the data - const size_t planeSize = src.rows * dst.step; - PtrStepb y_plane(dst.data, dst.step); - PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2); - PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2); - - T pix; - uchar y_val, u_val, v_val; - - pix = src(y, x); - rgbtoy(pix.z, pix.y, pix.x, y_val); - y_plane(y, x) = y_val; - - pix = src(y, x + 1); - rgbtoy(pix.z, pix.y, pix.x, y_val); - y_plane(y, x + 1) = y_val; - - pix = src(y + 1, x); - rgbtoy(pix.z, pix.y, pix.x, y_val); - y_plane(y + 1, x) = y_val; - - pix = src(y + 1, x + 1); - rgbtoyuv(pix.z, pix.y, pix.x, y_val, u_val, v_val); - y_plane(y + 1, x + 1) = y_val; - u_plane(y / 2, x / 2) = u_val; - v_plane(y / 2, x / 2) = v_val; - } - - void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst) - { - dim3 block(32, 8); - dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2)); - - Gray_to_YV12<<>>(src, dst); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - template - void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst) - { - typedef typename TypeVec::vec_type src_t; - - dim3 block(32, 8); - dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2)); - - BGR_to_YV12<<>>(static_cast< PtrStepSz >(src), dst); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst) - { - typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst); - - static const func_t funcs[] = - { - 0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4> - }; - - funcs[cn](src, dst); - } - } -}}} - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index cc25ab2f9..aacc43f99 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -80,20 +80,6 @@ #include #endif - #ifdef HAVE_NVCUVID - #include - - #ifdef WIN32 - #include - #undef small - #undef min - #undef max - #undef abs - - #include - #endif - #endif - #include "internal_shared.hpp" #include "opencv2/core/stream_accessor.hpp" diff --git a/modules/gpu/src/thread_wrappers.cpp b/modules/gpu/src/thread_wrappers.cpp deleted file mode 100644 index e8ee19e54..000000000 --- a/modules/gpu/src/thread_wrappers.cpp +++ /dev/null @@ -1,254 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "thread_wrappers.h" - -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) - -#ifdef WIN32 - #define NOMINMAX - #include -#else - #include - #include -#endif - -#ifdef WIN32 - class cv::gpu::detail::CriticalSection::Impl - { - public: - Impl() - { - InitializeCriticalSection(&criticalSection_); - } - - ~Impl() - { - DeleteCriticalSection(&criticalSection_); - } - - void enter() - { - EnterCriticalSection(&criticalSection_); - } - - void leave() - { - LeaveCriticalSection(&criticalSection_); - } - - private: - CRITICAL_SECTION criticalSection_; - }; -#else - class cv::gpu::detail::CriticalSection::Impl - { - public: - Impl() - { - pthread_mutexattr_t mutex_attribute; - pthread_mutexattr_init(&mutex_attribute); - pthread_mutexattr_settype(&mutex_attribute, PTHREAD_MUTEX_RECURSIVE); - pthread_mutex_init(&mutex_, 0); - pthread_mutexattr_destroy(&mutex_attribute); - } - - ~Impl() - { - pthread_mutex_destroy(&mutex_); - } - - void enter() - { - pthread_mutex_lock(&mutex_); - } - - void leave() - { - pthread_mutex_unlock(&mutex_); - } - - private: - pthread_mutex_t mutex_; - }; -#endif - -cv::gpu::detail::CriticalSection::CriticalSection() : - impl_(new Impl) -{ -} - -cv::gpu::detail::CriticalSection::~CriticalSection() -{ -} - -void cv::gpu::detail::CriticalSection::enter() -{ - impl_->enter(); -} - -void cv::gpu::detail::CriticalSection::leave() -{ - impl_->leave(); -} - -#ifdef WIN32 - namespace - { - struct UserData - { - void (*func)(void* userData); - void* param; - }; - - DWORD WINAPI WinThreadFunction(LPVOID lpParam) - { - UserData* userData = static_cast(lpParam); - - userData->func(userData->param); - - return 0; - } - } - - class cv::gpu::detail::Thread::Impl - { - public: - Impl(void (*func)(void* userData), void* userData) - { - userData_.func = func; - userData_.param = userData; - - thread_ = CreateThread( - NULL, // default security attributes - 0, // use default stack size - WinThreadFunction, // thread function name - &userData_, // argument to thread function - 0, // use default creation flags - &threadId_); // returns the thread identifier - } - - ~Impl() - { - CloseHandle(thread_); - } - - void wait() - { - WaitForSingleObject(thread_, INFINITE); - } - - private: - UserData userData_; - HANDLE thread_; - DWORD threadId_; - }; -#else - namespace - { - struct UserData - { - void (*func)(void* userData); - void* param; - }; - - void* PThreadFunction(void* lpParam) - { - UserData* userData = static_cast(lpParam); - - userData->func(userData->param); - - return 0; - } - } - - class cv::gpu::detail::Thread::Impl - { - public: - Impl(void (*func)(void* userData), void* userData) - { - userData_.func = func; - userData_.param = userData; - - pthread_create(&thread_, NULL, PThreadFunction, &userData_); - } - - ~Impl() - { - pthread_detach(thread_); - } - - void wait() - { - pthread_join(thread_, NULL); - } - - private: - pthread_t thread_; - UserData userData_; - }; -#endif - -cv::gpu::detail::Thread::Thread(void (*func)(void* userData), void* userData) : - impl_(new Impl(func, userData)) -{ -} - -cv::gpu::detail::Thread::~Thread() -{ -} - -void cv::gpu::detail::Thread::wait() -{ - impl_->wait(); -} - -void cv::gpu::detail::Thread::sleep(int ms) -{ -#ifdef WIN32 - ::Sleep(ms); -#else - ::usleep(ms * 1000); -#endif -} - -#endif // HAVE_CUDA diff --git a/modules/gpu/src/video_decoder.h b/modules/gpu/src/video_decoder.h deleted file mode 100644 index 0c8f8e08f..000000000 --- a/modules/gpu/src/video_decoder.h +++ /dev/null @@ -1,116 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __VIDEO_DECODER_H__ -#define __VIDEO_DECODER_H__ - -#include "precomp.hpp" - -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) - -namespace cv { namespace gpu -{ - namespace detail - { - class VideoDecoder - { - public: - VideoDecoder(const VideoReader_GPU::FormatInfo& videoFormat, CUvideoctxlock lock) : lock_(lock), decoder_(0) - { - create(videoFormat); - } - - ~VideoDecoder() - { - release(); - } - - void create(const VideoReader_GPU::FormatInfo& videoFormat); - void release(); - - // Get the code-type currently used. - cudaVideoCodec codec() const { return createInfo_.CodecType; } - unsigned long maxDecodeSurfaces() const { return createInfo_.ulNumDecodeSurfaces; } - - unsigned long frameWidth() const { return createInfo_.ulWidth; } - unsigned long frameHeight() const { return createInfo_.ulHeight; } - - unsigned long targetWidth() const { return createInfo_.ulTargetWidth; } - unsigned long targetHeight() const { return createInfo_.ulTargetHeight; } - - cudaVideoChromaFormat chromaFormat() const { return createInfo_.ChromaFormat; } - - bool decodePicture(CUVIDPICPARAMS* picParams) - { - return cuvidDecodePicture(decoder_, picParams) == CUDA_SUCCESS; - } - - cv::gpu::GpuMat mapFrame(int picIdx, CUVIDPROCPARAMS& videoProcParams) - { - CUdeviceptr ptr; - unsigned int pitch; - - cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) ); - - return GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch); - } - - void unmapFrame(cv::gpu::GpuMat& frame) - { - cuSafeCall( cuvidUnmapVideoFrame(decoder_, (CUdeviceptr) frame.data) ); - frame.release(); - } - - private: - VideoDecoder(const VideoDecoder&); - VideoDecoder& operator =(const VideoDecoder&); - - CUvideoctxlock lock_; - CUVIDDECODECREATEINFO createInfo_; - CUvideodecoder decoder_; - }; - } -}} - -#endif // HAVE_CUDA - -#endif // __VIDEO_DECODER_H__ diff --git a/modules/gpucodec/CMakeLists.txt b/modules/gpucodec/CMakeLists.txt new file mode 100644 index 000000000..f03c201b3 --- /dev/null +++ b/modules/gpucodec/CMakeLists.txt @@ -0,0 +1,29 @@ +if(ANDROID OR IOS) + ocv_module_disable(gpucodec) +endif() + +set(the_description "GPU-accelerated Video Encoding/Decoding") + +ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations) + +ocv_add_module(gpucodec opencv_highgui) + +ocv_module_include_directories() +ocv_glob_module_sources() + +set(extra_libs ${HIGHGUI_LIBRARIES}) + +if(HAVE_NVCUVID) + list(APPEND extra_libs ${CUDA_nvcuvid_LIBRARY}) + + if(WIN32) + list(APPEND extra_libs ${CUDA_nvcuvenc_LIBRARY}) + endif() +endif() + +ocv_create_module(${extra_libs}) + +ocv_add_precompiled_headers(${the_module}) + +ocv_add_accuracy_tests() +ocv_add_perf_tests() diff --git a/modules/gpucodec/doc/gpucodec.rst b/modules/gpucodec/doc/gpucodec.rst new file mode 100644 index 000000000..b9f763f84 --- /dev/null +++ b/modules/gpucodec/doc/gpucodec.rst @@ -0,0 +1,9 @@ +************************************************* +gpucodec. GPU-accelerated Video Encoding/Decoding +************************************************* + +.. toctree:: + :maxdepth: 1 + + videodec + videoenc diff --git a/modules/gpucodec/doc/videodec.rst b/modules/gpucodec/doc/videodec.rst new file mode 100644 index 000000000..342203223 --- /dev/null +++ b/modules/gpucodec/doc/videodec.rst @@ -0,0 +1,234 @@ +Video Decoding +============== + +.. highlight:: cpp + + + +gpu::VideoReader_GPU +-------------------- +Video reader class. + +.. ocv:class:: gpu::VideoReader_GPU + + + +gpu::VideoReader_GPU::Codec +--------------------------- + +Video codecs supported by :ocv:class:`gpu::VideoReader_GPU` . + +.. ocv:enum:: gpu::VideoReader_GPU::Codec + + .. ocv:emember:: MPEG1 = 0 + .. ocv:emember:: MPEG2 + .. ocv:emember:: MPEG4 + .. ocv:emember:: VC1 + .. ocv:emember:: H264 + .. ocv:emember:: JPEG + .. ocv:emember:: H264_SVC + .. ocv:emember:: H264_MVC + + .. ocv:emember:: Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')) + + Y,U,V (4:2:0) + + .. ocv:emember:: Uncompressed_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')) + + Y,V,U (4:2:0) + + .. ocv:emember:: Uncompressed_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')) + + Y,UV (4:2:0) + + .. ocv:emember:: Uncompressed_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')) + + YUYV/YUY2 (4:2:2) + + .. ocv:emember:: Uncompressed_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) + + UYVY (4:2:2) + + +gpu::VideoReader_GPU::ChromaFormat +---------------------------------- + +Chroma formats supported by :ocv:class:`gpu::VideoReader_GPU` . + +.. ocv:enum:: gpu::VideoReader_GPU::ChromaFormat + + .. ocv:emember:: Monochrome = 0 + .. ocv:emember:: YUV420 + .. ocv:emember:: YUV422 + .. ocv:emember:: YUV444 + + +gpu::VideoReader_GPU::FormatInfo +-------------------------------- +.. ocv:struct:: gpu::VideoReader_GPU::FormatInfo + +Struct providing information about video file format. :: + + struct FormatInfo + { + Codec codec; + ChromaFormat chromaFormat; + int width; + int height; + }; + + +gpu::VideoReader_GPU::VideoReader_GPU +------------------------------------- +Constructors. + +.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU() +.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const String& filename) +.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const cv::Ptr& source) + + :param filename: Name of the input video file. + + :param source: Video file parser implemented by user. + +The constructors initialize video reader. FFMPEG is used to read videos. User can implement own demultiplexing with :ocv:class:`gpu::VideoReader_GPU::VideoSource` . + + + +gpu::VideoReader_GPU::open +-------------------------- +Initializes or reinitializes video reader. + +.. ocv:function:: void gpu::VideoReader_GPU::open(const String& filename) +.. ocv:function:: void gpu::VideoReader_GPU::open(const cv::Ptr& source) + +The method opens video reader. Parameters are the same as in the constructor :ocv:func:`gpu::VideoReader_GPU::VideoReader_GPU` . The method throws :ocv:class:`Exception` if error occurs. + + + +gpu::VideoReader_GPU::isOpened +------------------------------ +Returns true if video reader has been successfully initialized. + +.. ocv:function:: bool gpu::VideoReader_GPU::isOpened() const + + + +gpu::VideoReader_GPU::close +--------------------------- +Releases the video reader. + +.. ocv:function:: void gpu::VideoReader_GPU::close() + + + +gpu::VideoReader_GPU::read +-------------------------- +Grabs, decodes and returns the next video frame. + +.. ocv:function:: bool gpu::VideoReader_GPU::read(GpuMat& image) + +If no frames has been grabbed (there are no more frames in video file), the methods return ``false`` . The method throws :ocv:class:`Exception` if error occurs. + + + +gpu::VideoReader_GPU::format +---------------------------- +Returns information about video file format. + +.. ocv:function:: FormatInfo gpu::VideoReader_GPU::format() const + +The method throws :ocv:class:`Exception` if video reader wasn't initialized. + + + +gpu::VideoReader_GPU::dumpFormat +-------------------------------- +Dump information about video file format to specified stream. + +.. ocv:function:: void gpu::VideoReader_GPU::dumpFormat(std::ostream& st) + + :param st: Output stream. + +The method throws :ocv:class:`Exception` if video reader wasn't initialized. + + + +gpu::VideoReader_GPU::VideoSource +----------------------------------- +.. ocv:class:: gpu::VideoReader_GPU::VideoSource + +Interface for video demultiplexing. :: + + class VideoSource + { + public: + VideoSource(); + virtual ~VideoSource() {} + + virtual FormatInfo format() const = 0; + virtual void start() = 0; + virtual void stop() = 0; + virtual bool isStarted() const = 0; + virtual bool hasError() const = 0; + + protected: + bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream = false); + }; + +User can implement own demultiplexing by implementing this interface. + + + +gpu::VideoReader_GPU::VideoSource::format +----------------------------------------- +Returns information about video file format. + +.. ocv:function:: virtual FormatInfo gpu::VideoReader_GPU::VideoSource::format() const = 0 + + + +gpu::VideoReader_GPU::VideoSource::start +---------------------------------------- +Starts processing. + +.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::start() = 0 + +Implementation must create own thread with video processing and call periodic :ocv:func:`gpu::VideoReader_GPU::VideoSource::parseVideoData` . + + + +gpu::VideoReader_GPU::VideoSource::stop +--------------------------------------- +Stops processing. + +.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::stop() = 0 + + + +gpu::VideoReader_GPU::VideoSource::isStarted +-------------------------------------------- +Returns ``true`` if processing was successfully started. + +.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::isStarted() const = 0 + + + +gpu::VideoReader_GPU::VideoSource::hasError +------------------------------------------- +Returns ``true`` if error occured during processing. + +.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::hasError() const = 0 + + + +gpu::VideoReader_GPU::VideoSource::parseVideoData +------------------------------------------------- +Parse next video frame. Implementation must call this method after new frame was grabbed. + +.. ocv:function:: bool gpu::VideoReader_GPU::VideoSource::parseVideoData(const uchar* data, size_t size, bool endOfStream = false) + + :param data: Pointer to frame data. Can be ``NULL`` if ``endOfStream`` if ``true`` . + + :param size: Size in bytes of current frame. + + :param endOfStream: Indicates that it is end of stream. diff --git a/modules/gpucodec/doc/videoenc.rst b/modules/gpucodec/doc/videoenc.rst new file mode 100644 index 000000000..ec26e27ef --- /dev/null +++ b/modules/gpucodec/doc/videoenc.rst @@ -0,0 +1,219 @@ +Video Encoding +============== + +.. highlight:: cpp + + + +gpu::VideoWriter_GPU +--------------------- +Video writer class. + +.. ocv:class:: gpu::VideoWriter_GPU + +The class uses H264 video codec. + +.. note:: Currently only Windows platform is supported. + + + +gpu::VideoWriter_GPU::VideoWriter_GPU +------------------------------------- +Constructors. + +.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU() +.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) +.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) +.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) +.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) + + :param fileName: Name of the output video file. Only AVI file format is supported. + + :param frameSize: Size of the input video frames. + + :param fps: Framerate of the created video stream. + + :param params: Encoder parameters. See :ocv:struct:`gpu::VideoWriter_GPU::EncoderParams` . + + :param format: Surface format of input frames ( ``SF_UYVY`` , ``SF_YUY2`` , ``SF_YV12`` , ``SF_NV12`` , ``SF_IYUV`` , ``SF_BGR`` or ``SF_GRAY``). BGR or gray frames will be converted to YV12 format before encoding, frames with other formats will be used as is. + + :param encoderCallback: Callbacks for video encoder. See :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` . Use it if you want to work with raw video stream. + +The constructors initialize video writer. FFMPEG is used to write videos. User can implement own multiplexing with :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` . + + + +gpu::VideoWriter_GPU::open +-------------------------- +Initializes or reinitializes video writer. + +.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) +.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) +.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) +.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) + +The method opens video writer. Parameters are the same as in the constructor :ocv:func:`gpu::VideoWriter_GPU::VideoWriter_GPU` . The method throws :ocv:class:`Exception` if error occurs. + + + +gpu::VideoWriter_GPU::isOpened +------------------------------ +Returns true if video writer has been successfully initialized. + +.. ocv:function:: bool gpu::VideoWriter_GPU::isOpened() const + + + +gpu::VideoWriter_GPU::close +--------------------------- +Releases the video writer. + +.. ocv:function:: void gpu::VideoWriter_GPU::close() + + + +gpu::VideoWriter_GPU::write +--------------------------- +Writes the next video frame. + +.. ocv:function:: void gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat& image, bool lastFrame = false) + + :param image: The written frame. + + :param lastFrame: Indicates that it is end of stream. The parameter can be ignored. + +The method write the specified image to video file. The image must have the same size and the same surface format as has been specified when opening the video writer. + + + +gpu::VideoWriter_GPU::EncoderParams +----------------------------------- +.. ocv:struct:: gpu::VideoWriter_GPU::EncoderParams + +Different parameters for CUDA video encoder. :: + + struct EncoderParams + { + int P_Interval; // NVVE_P_INTERVAL, + int IDR_Period; // NVVE_IDR_PERIOD, + int DynamicGOP; // NVVE_DYNAMIC_GOP, + int RCType; // NVVE_RC_TYPE, + int AvgBitrate; // NVVE_AVG_BITRATE, + int PeakBitrate; // NVVE_PEAK_BITRATE, + int QP_Level_Intra; // NVVE_QP_LEVEL_INTRA, + int QP_Level_InterP; // NVVE_QP_LEVEL_INTER_P, + int QP_Level_InterB; // NVVE_QP_LEVEL_INTER_B, + int DeblockMode; // NVVE_DEBLOCK_MODE, + int ProfileLevel; // NVVE_PROFILE_LEVEL, + int ForceIntra; // NVVE_FORCE_INTRA, + int ForceIDR; // NVVE_FORCE_IDR, + int ClearStat; // NVVE_CLEAR_STAT, + int DIMode; // NVVE_SET_DEINTERLACE, + int Presets; // NVVE_PRESETS, + int DisableCabac; // NVVE_DISABLE_CABAC, + int NaluFramingType; // NVVE_CONFIGURE_NALU_FRAMING_TYPE + int DisableSPSPPS; // NVVE_DISABLE_SPS_PPS + + EncoderParams(); + explicit EncoderParams(const String& configFile); + + void load(const String& configFile); + void save(const String& configFile) const; + }; + + + +gpu::VideoWriter_GPU::EncoderParams::EncoderParams +-------------------------------------------------- +Constructors. + +.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams() +.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams(const String& configFile) + + :param configFile: Config file name. + +Creates default parameters or reads parameters from config file. + + + +gpu::VideoWriter_GPU::EncoderParams::load +----------------------------------------- +Reads parameters from config file. + +.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::load(const String& configFile) + + :param configFile: Config file name. + + + +gpu::VideoWriter_GPU::EncoderParams::save +----------------------------------------- +Saves parameters to config file. + +.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::save(const String& configFile) const + + :param configFile: Config file name. + + + +gpu::VideoWriter_GPU::EncoderCallBack +------------------------------------- +.. ocv:class:: gpu::VideoWriter_GPU::EncoderCallBack + +Callbacks for CUDA video encoder. :: + + class EncoderCallBack + { + public: + enum PicType + { + IFRAME = 1, + PFRAME = 2, + BFRAME = 3 + }; + + virtual ~EncoderCallBack() {} + + virtual unsigned char* acquireBitStream(int* bufferSize) = 0; + virtual void releaseBitStream(unsigned char* data, int size) = 0; + virtual void onBeginFrame(int frameNumber, PicType picType) = 0; + virtual void onEndFrame(int frameNumber, PicType picType) = 0; + }; + + + +gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream +------------------------------------------------------- +Callback function to signal the start of bitstream that is to be encoded. + +.. ocv:function:: virtual uchar* gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream(int* bufferSize) = 0 + +Callback must allocate buffer for CUDA encoder and return pointer to it and it's size. + + + +gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream +------------------------------------------------------- +Callback function to signal that the encoded bitstream is ready to be written to file. + +.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream(unsigned char* data, int size) = 0 + + + +gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame +--------------------------------------------------- +Callback function to signal that the encoding operation on the frame has started. + +.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame(int frameNumber, PicType picType) = 0 + + :param picType: Specify frame type (I-Frame, P-Frame or B-Frame). + + + +gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame +------------------------------------------------- +Callback function signals that the encoding operation on the frame has finished. + +.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame(int frameNumber, PicType picType) = 0 + + :param picType: Specify frame type (I-Frame, P-Frame or B-Frame). diff --git a/modules/gpucodec/include/opencv2/gpucodec.hpp b/modules/gpucodec/include/opencv2/gpucodec.hpp new file mode 100644 index 000000000..ac9c40047 --- /dev/null +++ b/modules/gpucodec/include/opencv2/gpucodec.hpp @@ -0,0 +1,265 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPUCODEC_HPP__ +#define __OPENCV_GPUCODEC_HPP__ + +#ifndef __cplusplus +# error gpucodec.hpp header must be compiled as C++ +#endif + +#include + +#include "opencv2/core/gpumat.hpp" + +namespace cv { namespace gpu { + +////////////////////////////////// Video Encoding ////////////////////////////////// + +// Works only under Windows +// Supports olny H264 video codec and AVI files +class CV_EXPORTS VideoWriter_GPU +{ +public: + struct EncoderParams; + + // Callbacks for video encoder, use it if you want to work with raw video stream + class EncoderCallBack; + + enum SurfaceFormat + { + SF_UYVY = 0, + SF_YUY2, + SF_YV12, + SF_NV12, + SF_IYUV, + SF_BGR, + SF_GRAY = SF_BGR + }; + + VideoWriter_GPU(); + VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); + VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); + VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); + VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); + ~VideoWriter_GPU(); + + // all methods throws cv::Exception if error occurs + void open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); + void open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); + void open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); + void open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); + + bool isOpened() const; + void close(); + + void write(const cv::gpu::GpuMat& image, bool lastFrame = false); + + struct CV_EXPORTS EncoderParams + { + int P_Interval; // NVVE_P_INTERVAL, + int IDR_Period; // NVVE_IDR_PERIOD, + int DynamicGOP; // NVVE_DYNAMIC_GOP, + int RCType; // NVVE_RC_TYPE, + int AvgBitrate; // NVVE_AVG_BITRATE, + int PeakBitrate; // NVVE_PEAK_BITRATE, + int QP_Level_Intra; // NVVE_QP_LEVEL_INTRA, + int QP_Level_InterP; // NVVE_QP_LEVEL_INTER_P, + int QP_Level_InterB; // NVVE_QP_LEVEL_INTER_B, + int DeblockMode; // NVVE_DEBLOCK_MODE, + int ProfileLevel; // NVVE_PROFILE_LEVEL, + int ForceIntra; // NVVE_FORCE_INTRA, + int ForceIDR; // NVVE_FORCE_IDR, + int ClearStat; // NVVE_CLEAR_STAT, + int DIMode; // NVVE_SET_DEINTERLACE, + int Presets; // NVVE_PRESETS, + int DisableCabac; // NVVE_DISABLE_CABAC, + int NaluFramingType; // NVVE_CONFIGURE_NALU_FRAMING_TYPE + int DisableSPSPPS; // NVVE_DISABLE_SPS_PPS + + EncoderParams(); + explicit EncoderParams(const String& configFile); + + void load(const String& configFile); + void save(const String& configFile) const; + }; + + EncoderParams getParams() const; + + class CV_EXPORTS EncoderCallBack + { + public: + enum PicType + { + IFRAME = 1, + PFRAME = 2, + BFRAME = 3 + }; + + virtual ~EncoderCallBack() {} + + // callback function to signal the start of bitstream that is to be encoded + // must return pointer to buffer + virtual uchar* acquireBitStream(int* bufferSize) = 0; + + // callback function to signal that the encoded bitstream is ready to be written to file + virtual void releaseBitStream(unsigned char* data, int size) = 0; + + // callback function to signal that the encoding operation on the frame has started + virtual void onBeginFrame(int frameNumber, PicType picType) = 0; + + // callback function signals that the encoding operation on the frame has finished + virtual void onEndFrame(int frameNumber, PicType picType) = 0; + }; + + class Impl; + +private: + cv::Ptr impl_; +}; + +////////////////////////////////// Video Decoding ////////////////////////////////////////// + +namespace detail +{ + class FrameQueue; + class VideoParser; +} + +class CV_EXPORTS VideoReader_GPU +{ +public: + enum Codec + { + MPEG1 = 0, + MPEG2, + MPEG4, + VC1, + H264, + JPEG, + H264_SVC, + H264_MVC, + + Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), // Y,U,V (4:2:0) + Uncompressed_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,V,U (4:2:0) + Uncompressed_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,UV (4:2:0) + Uncompressed_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), // YUYV/YUY2 (4:2:2) + Uncompressed_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')), // UYVY (4:2:2) + }; + + enum ChromaFormat + { + Monochrome=0, + YUV420, + YUV422, + YUV444, + }; + + struct FormatInfo + { + Codec codec; + ChromaFormat chromaFormat; + int width; + int height; + }; + + class VideoSource; + + VideoReader_GPU(); + explicit VideoReader_GPU(const String& filename); + explicit VideoReader_GPU(const cv::Ptr& source); + + ~VideoReader_GPU(); + + void open(const String& filename); + void open(const cv::Ptr& source); + bool isOpened() const; + + void close(); + + bool read(GpuMat& image); + + FormatInfo format() const; + void dumpFormat(std::ostream& st); + + class CV_EXPORTS VideoSource + { + public: + VideoSource() : frameQueue_(0), videoParser_(0) {} + virtual ~VideoSource() {} + + virtual FormatInfo format() const = 0; + virtual void start() = 0; + virtual void stop() = 0; + virtual bool isStarted() const = 0; + virtual bool hasError() const = 0; + + void setFrameQueue(detail::FrameQueue* frameQueue) { frameQueue_ = frameQueue; } + void setVideoParser(detail::VideoParser* videoParser) { videoParser_ = videoParser; } + + protected: + bool parseVideoData(const uchar* data, size_t size, bool endOfStream = false); + + private: + VideoSource(const VideoSource&); + VideoSource& operator =(const VideoSource&); + + detail::FrameQueue* frameQueue_; + detail::VideoParser* videoParser_; + }; + + class Impl; + +private: + cv::Ptr impl_; +}; + +}} // namespace cv { namespace gpu { + +namespace cv { + +template <> CV_EXPORTS void Ptr::delete_obj(); +template <> CV_EXPORTS void Ptr::delete_obj(); + +} + +#endif /* __OPENCV_GPUCODEC_HPP__ */ diff --git a/modules/gpucodec/perf/perf_main.cpp b/modules/gpucodec/perf/perf_main.cpp new file mode 100644 index 000000000..2f4110b87 --- /dev/null +++ b/modules/gpucodec/perf/perf_main.cpp @@ -0,0 +1,47 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +using namespace perf; + +CV_PERF_TEST_MAIN(gpucodec, printCudaInfo()) diff --git a/modules/gpucodec/perf/perf_precomp.cpp b/modules/gpucodec/perf/perf_precomp.cpp new file mode 100644 index 000000000..81f16e8f1 --- /dev/null +++ b/modules/gpucodec/perf/perf_precomp.cpp @@ -0,0 +1,43 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" diff --git a/modules/gpucodec/perf/perf_precomp.hpp b/modules/gpucodec/perf/perf_precomp.hpp new file mode 100644 index 000000000..421fa5763 --- /dev/null +++ b/modules/gpucodec/perf/perf_precomp.hpp @@ -0,0 +1,64 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-declarations" +# if defined __clang__ || defined __APPLE__ +# pragma GCC diagnostic ignored "-Wmissing-prototypes" +# pragma GCC diagnostic ignored "-Wextra" +# endif +#endif + +#ifndef __OPENCV_PERF_PRECOMP_HPP__ +#define __OPENCV_PERF_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/ts/gpu_perf.hpp" + +#include "opencv2/gpucodec.hpp" +#include "opencv2/highgui.hpp" + +#ifdef GTEST_CREATE_SHARED_LIBRARY +#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined +#endif + +#endif diff --git a/modules/gpucodec/perf/perf_video.cpp b/modules/gpucodec/perf/perf_video.cpp new file mode 100644 index 000000000..8f5e1700e --- /dev/null +++ b/modules/gpucodec/perf/perf_video.cpp @@ -0,0 +1,162 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" +#include "opencv2/highgui/highgui_c.h" + +using namespace std; +using namespace testing; +using namespace perf; + +#if defined(HAVE_XINE) || \ + defined(HAVE_GSTREAMER) || \ + defined(HAVE_QUICKTIME) || \ + defined(HAVE_AVFOUNDATION) || \ + defined(HAVE_FFMPEG) || \ + defined(WIN32) /* assume that we have ffmpeg */ + +# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 +#else +# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0 +#endif + +DEF_PARAM_TEST_1(FileName, string); + +////////////////////////////////////////////////////// +// VideoReader + +#if defined(HAVE_NVCUVID) && BUILD_WITH_VIDEO_INPUT_SUPPORT + +PERF_TEST_P(FileName, VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi")) +{ + declare.time(20); + + const string inputFile = perf::TestBase::getDataPath(GetParam()); + + if (PERF_RUN_GPU()) + { + cv::gpu::VideoReader_GPU d_reader(inputFile); + ASSERT_TRUE( d_reader.isOpened() ); + + cv::gpu::GpuMat frame; + + TEST_CYCLE_N(10) d_reader.read(frame); + + GPU_SANITY_CHECK(frame); + } + else + { + cv::VideoCapture reader(inputFile); + ASSERT_TRUE( reader.isOpened() ); + + cv::Mat frame; + + TEST_CYCLE_N(10) reader >> frame; + + CPU_SANITY_CHECK(frame); + } +} + +#endif + +////////////////////////////////////////////////////// +// VideoWriter + +#if defined(HAVE_NVCUVID) && defined(WIN32) + +PERF_TEST_P(FileName, VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi")) +{ + declare.time(30); + + const string inputFile = perf::TestBase::getDataPath(GetParam()); + const string outputFile = cv::tempfile(".avi"); + + const double FPS = 25.0; + + cv::VideoCapture reader(inputFile); + ASSERT_TRUE( reader.isOpened() ); + + cv::Mat frame; + + if (PERF_RUN_GPU()) + { + cv::gpu::VideoWriter_GPU d_writer; + + cv::gpu::GpuMat d_frame; + + for (int i = 0; i < 10; ++i) + { + reader >> frame; + ASSERT_FALSE(frame.empty()); + + d_frame.upload(frame); + + if (!d_writer.isOpened()) + d_writer.open(outputFile, frame.size(), FPS); + + startTimer(); next(); + d_writer.write(d_frame); + stopTimer(); + } + } + else + { + cv::VideoWriter writer; + + for (int i = 0; i < 10; ++i) + { + reader >> frame; + ASSERT_FALSE(frame.empty()); + + if (!writer.isOpened()) + writer.open(outputFile, CV_FOURCC('X', 'V', 'I', 'D'), FPS, frame.size()); + + startTimer(); next(); + writer.write(frame); + stopTimer(); + } + } + + SANITY_CHECK(frame); +} + +#endif diff --git a/modules/gpucodec/src/cuda/nv12_to_rgb.cu b/modules/gpucodec/src/cuda/nv12_to_rgb.cu new file mode 100644 index 000000000..536ba2715 --- /dev/null +++ b/modules/gpucodec/src/cuda/nv12_to_rgb.cu @@ -0,0 +1,193 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/* + * NV12ToARGB color space conversion CUDA kernel + * + * This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar) + * source and converts to output in ARGB format + */ + +#include "opencv2/core/cuda/common.hpp" + +namespace cv { namespace gpu { namespace cudev +{ + __constant__ float constHueColorSpaceMat[9]; + + void loadHueCSC(float hueCSC[9]) + { + cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) ); + } + + __device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue) + { + float luma, chromaCb, chromaCr; + + // Prepare for hue adjustment + luma = (float)yuvi[0]; + chromaCb = (float)((int)yuvi[1] - 512.0f); + chromaCr = (float)((int)yuvi[2] - 512.0f); + + // Convert YUV To RGB with hue adjustment + *red = (luma * constHueColorSpaceMat[0]) + + (chromaCb * constHueColorSpaceMat[1]) + + (chromaCr * constHueColorSpaceMat[2]); + + *green = (luma * constHueColorSpaceMat[3]) + + (chromaCb * constHueColorSpaceMat[4]) + + (chromaCr * constHueColorSpaceMat[5]); + + *blue = (luma * constHueColorSpaceMat[6]) + + (chromaCb * constHueColorSpaceMat[7]) + + (chromaCr * constHueColorSpaceMat[8]); + } + + __device__ uint RGBA_pack_10bit(float red, float green, float blue, uint alpha) + { + uint ARGBpixel = 0; + + // Clamp final 10 bit results + red = ::fmin(::fmax(red, 0.0f), 1023.f); + green = ::fmin(::fmax(green, 0.0f), 1023.f); + blue = ::fmin(::fmax(blue, 0.0f), 1023.f); + + // Convert to 8 bit unsigned integers per color component + ARGBpixel = (((uint)blue >> 2) | + (((uint)green >> 2) << 8) | + (((uint)red >> 2) << 16) | + (uint)alpha); + + return ARGBpixel; + } + + // CUDA kernel for outputing the final ARGB output from NV12 + + #define COLOR_COMPONENT_BIT_SIZE 10 + #define COLOR_COMPONENT_MASK 0x3FF + + __global__ void NV12_to_RGB(uchar* srcImage, size_t nSourcePitch, + uint* dstImage, size_t nDestPitch, + uint width, uint height) + { + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread + const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= width || y >= height) + return; + + // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. + // if we move to texture we could read 4 luminance values + + uint yuv101010Pel[2]; + + yuv101010Pel[0] = (srcImage[y * nSourcePitch + x ]) << 2; + yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2; + + const size_t chromaOffset = nSourcePitch * height; + + const int y_chroma = y >> 1; + + if (y & 1) // odd scanline ? + { + uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ]; + uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1]; + + if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically + { + chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1; + chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1; + } + + yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + else + { + yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + + // this steps performs the color conversion + uint yuvi[6]; + float red[2], green[2], blue[2]; + + yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK ); + yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK ); + yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + // YUV to RGB Transformation conversion + YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]); + YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]); + + // Clamp the results to RGBA + + const size_t dstImagePitch = nDestPitch >> 2; + + dstImage[y * dstImagePitch + x ] = RGBA_pack_10bit(red[0], green[0], blue[0], ((uint)0xff << 24)); + dstImage[y * dstImagePitch + x + 1 ] = RGBA_pack_10bit(red[1], green[1], blue[1], ((uint)0xff << 24)); + } + + void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz interopFrame, cudaStream_t stream) + { + dim3 block(32, 8); + dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y)); + + NV12_to_RGB<<>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step, + interopFrame.cols, interopFrame.rows); + + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } +}}} diff --git a/modules/gpucodec/src/cuda/rgb_to_yv12.cu b/modules/gpucodec/src/cuda/rgb_to_yv12.cu new file mode 100644 index 000000000..8787b1e68 --- /dev/null +++ b/modules/gpucodec/src/cuda/rgb_to_yv12.cu @@ -0,0 +1,170 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/vec_traits.hpp" + +namespace cv { namespace gpu { namespace cudev +{ + __device__ __forceinline__ void rgb_to_y(const uchar b, const uchar g, const uchar r, uchar& y) + { + y = static_cast(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100); + } + + __device__ __forceinline__ void rgb_to_yuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v) + { + rgb_to_y(b, g, r, y); + u = static_cast(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100); + v = static_cast(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100); + } + + __global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst) + { + const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; + const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; + + if (x + 1 >= src.cols || y + 1 >= src.rows) + return; + + // get pointers to the data + const size_t planeSize = src.rows * dst.step; + PtrStepb y_plane(dst.data, dst.step); + PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2); + PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2); + + uchar pix; + uchar y_val, u_val, v_val; + + pix = src(y, x); + rgb_to_y(pix, pix, pix, y_val); + y_plane(y, x) = y_val; + + pix = src(y, x + 1); + rgb_to_y(pix, pix, pix, y_val); + y_plane(y, x + 1) = y_val; + + pix = src(y + 1, x); + rgb_to_y(pix, pix, pix, y_val); + y_plane(y + 1, x) = y_val; + + pix = src(y + 1, x + 1); + rgb_to_yuv(pix, pix, pix, y_val, u_val, v_val); + y_plane(y + 1, x + 1) = y_val; + u_plane(y / 2, x / 2) = u_val; + v_plane(y / 2, x / 2) = v_val; + } + + template + __global__ void RGB_to_YV12(const PtrStepSz src, PtrStepb dst) + { + const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; + const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; + + if (x + 1 >= src.cols || y + 1 >= src.rows) + return; + + // get pointers to the data + const size_t planeSize = src.rows * dst.step; + PtrStepb y_plane(dst.data, dst.step); + PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2); + PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2); + + T pix; + uchar y_val, u_val, v_val; + + pix = src(y, x); + rgb_to_y(pix.z, pix.y, pix.x, y_val); + y_plane(y, x) = y_val; + + pix = src(y, x + 1); + rgb_to_y(pix.z, pix.y, pix.x, y_val); + y_plane(y, x + 1) = y_val; + + pix = src(y + 1, x); + rgb_to_y(pix.z, pix.y, pix.x, y_val); + y_plane(y + 1, x) = y_val; + + pix = src(y + 1, x + 1); + rgb_to_yuv(pix.z, pix.y, pix.x, y_val, u_val, v_val); + y_plane(y + 1, x + 1) = y_val; + u_plane(y / 2, x / 2) = u_val; + v_plane(y / 2, x / 2) = v_val; + } + + void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream) + { + dim3 block(32, 8); + dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2)); + + Gray_to_YV12<<>>(src, dst); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + template + void RGB_to_YV12_caller(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream) + { + typedef typename TypeVec::vec_type src_t; + + dim3 block(32, 8); + dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2)); + + RGB_to_YV12<<>>(static_cast< PtrStepSz >(src), dst); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream) + { + typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream); + + static const func_t funcs[] = + { + 0, Gray_to_YV12_caller, 0, RGB_to_YV12_caller<3>, RGB_to_YV12_caller<4> + }; + + funcs[cn](src, dst, stream); + } +}}} diff --git a/modules/gpu/src/cuvid_video_source.cpp b/modules/gpucodec/src/cuvid_video_source.cpp similarity index 96% rename from modules/gpu/src/cuvid_video_source.cpp rename to modules/gpucodec/src/cuvid_video_source.cpp index b725a707b..73d6d2426 100644 --- a/modules/gpu/src/cuvid_video_source.cpp +++ b/modules/gpucodec/src/cuvid_video_source.cpp @@ -40,9 +40,9 @@ // //M*/ -#include "cuvid_video_source.h" +#include "precomp.hpp" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#ifdef HAVE_NVCUVID cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const String& fname) { @@ -69,6 +69,11 @@ cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const String& fname) format_.height = vidfmt.coded_height; } +cv::gpu::detail::CuvidVideoSource::~CuvidVideoSource() +{ + cuvidDestroyVideoSource(videoSource_); +} + cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::CuvidVideoSource::format() const { return format_; @@ -101,4 +106,4 @@ int CUDAAPI cv::gpu::detail::CuvidVideoSource::HandleVideoData(void* userData, C return thiz->parseVideoData(packet->payload, packet->payload_size, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0); } -#endif // defined(HAVE_CUDA) && !defined(__APPLE__) +#endif // HAVE_NVCUVID diff --git a/modules/gpu/src/cuvid_video_source.h b/modules/gpucodec/src/cuvid_video_source.h similarity index 61% rename from modules/gpu/src/cuvid_video_source.h rename to modules/gpucodec/src/cuvid_video_source.h index 1bf484902..a0b78222d 100644 --- a/modules/gpu/src/cuvid_video_source.h +++ b/modules/gpucodec/src/cuvid_video_source.h @@ -43,48 +43,44 @@ #ifndef __CUVUD_VIDEO_SOURCE_H__ #define __CUVUD_VIDEO_SOURCE_H__ -#include "precomp.hpp" +#include "opencv2/core/gpu_private.hpp" +#include "opencv2/gpucodec.hpp" +#include "thread.h" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#include -namespace cv { namespace gpu +namespace cv { namespace gpu { namespace detail { - namespace detail - { - class CuvidVideoSource : public VideoReader_GPU::VideoSource - { - public: - explicit CuvidVideoSource(const String& fname); - ~CuvidVideoSource() { cuvidDestroyVideoSource(videoSource_); } - VideoReader_GPU::FormatInfo format() const; - void start(); - void stop(); - bool isStarted() const; - bool hasError() const; +class CuvidVideoSource : public VideoReader_GPU::VideoSource +{ +public: + explicit CuvidVideoSource(const String& fname); + ~CuvidVideoSource(); - private: - CuvidVideoSource(const CuvidVideoSource&); - CuvidVideoSource& operator =(const CuvidVideoSource&); + VideoReader_GPU::FormatInfo format() const; + void start(); + void stop(); + bool isStarted() const; + bool hasError() const; - // Callback for handling packages of demuxed video data. - // - // Parameters: - // pUserData - Pointer to user data. We must pass a pointer to a - // VideoSourceData struct here, that contains a valid CUvideoparser - // and FrameQueue. - // pPacket - video-source data packet. - // - // NOTE: called from a different thread that doesn't not have a cuda context - // - static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket); +private: + // Callback for handling packages of demuxed video data. + // + // Parameters: + // pUserData - Pointer to user data. We must pass a pointer to a + // VideoSourceData struct here, that contains a valid CUvideoparser + // and FrameQueue. + // pPacket - video-source data packet. + // + // NOTE: called from a different thread that doesn't not have a cuda context + // + static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket); - CUvideosource videoSource_; - VideoReader_GPU::FormatInfo format_; - }; - } -}} + CUvideosource videoSource_; + VideoReader_GPU::FormatInfo format_; +}; -#endif // defined(HAVE_CUDA) && !defined(__APPLE__) +}}} #endif // __CUVUD_VIDEO_SOURCE_H__ diff --git a/modules/gpu/src/ffmpeg_video_source.cpp b/modules/gpucodec/src/ffmpeg_video_source.cpp similarity index 94% rename from modules/gpu/src/ffmpeg_video_source.cpp rename to modules/gpucodec/src/ffmpeg_video_source.cpp index 16cd7b64e..6ba09284d 100644 --- a/modules/gpu/src/ffmpeg_video_source.cpp +++ b/modules/gpucodec/src/ffmpeg_video_source.cpp @@ -40,14 +40,12 @@ // //M*/ -#include "ffmpeg_video_source.h" +#include "precomp.hpp" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#ifdef HAVE_NVCUVID -#if defined(HAVE_FFMPEG) && defined(BUILD_SHARED_LIBS) +#if defined(HAVE_FFMPEG) && defined(BUILD_SHARED_LIBS) && !defined(WIN32) #include "../src/cap_ffmpeg_impl.hpp" -#else - #include "../src/cap_ffmpeg_api.hpp" #endif namespace @@ -116,11 +114,6 @@ cv::gpu::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname) : format_.height = height; } -cv::gpu::detail::FFmpegVideoSource::~FFmpegVideoSource() -{ - release_InputMediaStream_FFMPEG_p(stream_); -} - cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::FFmpegVideoSource::format() const { return format_; @@ -130,14 +123,14 @@ void cv::gpu::detail::FFmpegVideoSource::start() { stop_ = false; hasError_ = false; - thread_.reset(new Thread(readLoop, this)); + thread_ = new Thread(readLoop, this); } void cv::gpu::detail::FFmpegVideoSource::stop() { stop_ = true; thread_->wait(); - thread_.reset(); + thread_.release(); } bool cv::gpu::detail::FFmpegVideoSource::isStarted() const @@ -179,4 +172,9 @@ void cv::gpu::detail::FFmpegVideoSource::readLoop(void* userData) thiz->parseVideoData(0, 0, true); } +template <> void cv::Ptr::delete_obj() +{ + if (obj) release_InputMediaStream_FFMPEG_p(obj); +} + #endif // HAVE_CUDA diff --git a/modules/gpu/src/ffmpeg_video_source.h b/modules/gpucodec/src/ffmpeg_video_source.h similarity index 69% rename from modules/gpu/src/ffmpeg_video_source.h rename to modules/gpucodec/src/ffmpeg_video_source.h index a2ba40ccc..d097785d7 100644 --- a/modules/gpu/src/ffmpeg_video_source.h +++ b/modules/gpucodec/src/ffmpeg_video_source.h @@ -43,46 +43,40 @@ #ifndef __FFMPEG_VIDEO_SOURCE_H__ #define __FFMPEG_VIDEO_SOURCE_H__ -#include "precomp.hpp" -#include "thread_wrappers.h" - -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#include "opencv2/gpucodec.hpp" +#include "thread.h" struct InputMediaStream_FFMPEG; -namespace cv { namespace gpu +namespace cv { namespace gpu { namespace detail { + +class FFmpegVideoSource : public VideoReader_GPU::VideoSource { - namespace detail - { - class FFmpegVideoSource : public VideoReader_GPU::VideoSource - { - public: - FFmpegVideoSource(const String& fname); - ~FFmpegVideoSource(); +public: + FFmpegVideoSource(const String& fname); - VideoReader_GPU::FormatInfo format() const; - void start(); - void stop(); - bool isStarted() const; - bool hasError() const; + VideoReader_GPU::FormatInfo format() const; + void start(); + void stop(); + bool isStarted() const; + bool hasError() const; - private: - FFmpegVideoSource(const FFmpegVideoSource&); - FFmpegVideoSource& operator =(const FFmpegVideoSource&); +private: + VideoReader_GPU::FormatInfo format_; - VideoReader_GPU::FormatInfo format_; + cv::Ptr stream_; - InputMediaStream_FFMPEG* stream_; + cv::Ptr thread_; + volatile bool stop_; + volatile bool hasError_; - std::auto_ptr thread_; - volatile bool stop_; - volatile bool hasError_; + static void readLoop(void* userData); +}; - static void readLoop(void* userData); - }; - } -}} +}}} -#endif // HAVE_CUDA +namespace cv { + template <> void Ptr::delete_obj(); +} -#endif // __CUVUD_VIDEO_SOURCE_H__ +#endif // __FFMPEG_VIDEO_SOURCE_H__ diff --git a/modules/gpu/src/frame_queue.cpp b/modules/gpucodec/src/frame_queue.cpp similarity index 94% rename from modules/gpu/src/frame_queue.cpp rename to modules/gpucodec/src/frame_queue.cpp index a8b9cff0b..2c5045500 100644 --- a/modules/gpu/src/frame_queue.cpp +++ b/modules/gpucodec/src/frame_queue.cpp @@ -40,9 +40,9 @@ // //M*/ -#include "frame_queue.h" +#include "precomp.hpp" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#ifdef HAVE_NVCUVID cv::gpu::detail::FrameQueue::FrameQueue() : endOfDecode_(0), @@ -79,7 +79,7 @@ void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams) bool isFramePlaced = false; { - CriticalSection::AutoLock autoLock(criticalSection_); + AutoLock autoLock(mtx_); if (framesInQueue_ < MaximumSize) { @@ -100,7 +100,7 @@ void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams) bool cv::gpu::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo) { - CriticalSection::AutoLock autoLock(criticalSection_); + AutoLock autoLock(mtx_); if (framesInQueue_ > 0) { @@ -114,4 +114,4 @@ bool cv::gpu::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo) return false; } -#endif // HAVE_CUDA +#endif // HAVE_NVCUVID diff --git a/modules/gpu/src/frame_queue.h b/modules/gpucodec/src/frame_queue.h similarity index 55% rename from modules/gpu/src/frame_queue.h rename to modules/gpucodec/src/frame_queue.h index e408b0dd0..51e3bcedb 100644 --- a/modules/gpu/src/frame_queue.h +++ b/modules/gpucodec/src/frame_queue.h @@ -43,61 +43,55 @@ #ifndef __FRAME_QUEUE_H__ #define __FRAME_QUEUE_H__ -#include "precomp.hpp" -#include "thread_wrappers.h" +#include "opencv2/core/utility.hpp" +#include "opencv2/core/gpu_private.hpp" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#include -namespace cv { namespace gpu +namespace cv { namespace gpu { namespace detail { - namespace detail - { - class FrameQueue - { - public: - static const int MaximumSize = 20; // MAX_FRM_CNT; - FrameQueue(); +class FrameQueue +{ +public: + static const int MaximumSize = 20; // MAX_FRM_CNT; - void endDecode() { endOfDecode_ = true; } - bool isEndOfDecode() const { return endOfDecode_ != 0;} + FrameQueue(); - // Spins until frame becomes available or decoding gets canceled. - // If the requested frame is available the method returns true. - // If decoding was interupted before the requested frame becomes - // available, the method returns false. - bool waitUntilFrameAvailable(int pictureIndex); + void endDecode() { endOfDecode_ = true; } + bool isEndOfDecode() const { return endOfDecode_ != 0;} - void enqueue(const CUVIDPARSERDISPINFO* picParams); + // Spins until frame becomes available or decoding gets canceled. + // If the requested frame is available the method returns true. + // If decoding was interupted before the requested frame becomes + // available, the method returns false. + bool waitUntilFrameAvailable(int pictureIndex); - // Deque the next frame. - // Parameters: - // displayInfo - New frame info gets placed into this object. - // Returns: - // true, if a new frame was returned, - // false, if the queue was empty and no new frame could be returned. - bool dequeue(CUVIDPARSERDISPINFO& displayInfo); + void enqueue(const CUVIDPARSERDISPINFO* picParams); - void releaseFrame(const CUVIDPARSERDISPINFO& picParams) { isFrameInUse_[picParams.picture_index] = false; } + // Deque the next frame. + // Parameters: + // displayInfo - New frame info gets placed into this object. + // Returns: + // true, if a new frame was returned, + // false, if the queue was empty and no new frame could be returned. + bool dequeue(CUVIDPARSERDISPINFO& displayInfo); - private: - FrameQueue(const FrameQueue&); - FrameQueue& operator =(const FrameQueue&); + void releaseFrame(const CUVIDPARSERDISPINFO& picParams) { isFrameInUse_[picParams.picture_index] = false; } - bool isInUse(int pictureIndex) const { return isFrameInUse_[pictureIndex] != 0; } +private: + bool isInUse(int pictureIndex) const { return isFrameInUse_[pictureIndex] != 0; } - CriticalSection criticalSection_; + Mutex mtx_; - volatile int isFrameInUse_[MaximumSize]; - volatile int endOfDecode_; + volatile int isFrameInUse_[MaximumSize]; + volatile int endOfDecode_; - int framesInQueue_; - int readPosition_; - CUVIDPARSERDISPINFO displayQueue_[MaximumSize]; - }; - } -}} + int framesInQueue_; + int readPosition_; + CUVIDPARSERDISPINFO displayQueue_[MaximumSize]; +}; -#endif // HAVE_CUDA +}}} #endif // __FRAME_QUEUE_H__ diff --git a/modules/gpucodec/src/precomp.cpp b/modules/gpucodec/src/precomp.cpp new file mode 100644 index 000000000..3c01a2596 --- /dev/null +++ b/modules/gpucodec/src/precomp.cpp @@ -0,0 +1,43 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" diff --git a/modules/gpucodec/src/precomp.hpp b/modules/gpucodec/src/precomp.hpp new file mode 100644 index 000000000..9db176e66 --- /dev/null +++ b/modules/gpucodec/src/precomp.hpp @@ -0,0 +1,79 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_PRECOMP_H__ +#define __OPENCV_PRECOMP_H__ + +#include +#include +#include +#include +#include +#include + +#include "opencv2/gpucodec.hpp" + +#include "opencv2/core/gpu_private.hpp" + +#ifdef HAVE_NVCUVID + #include + + #ifdef WIN32 + #define NOMINMAX + #include + #include + #else + #include + #include + #endif + + #include "thread.h" + #include "ffmpeg_video_source.h" + #include "cuvid_video_source.h" + #include "frame_queue.h" + #include "video_decoder.h" + #include "video_parser.h" + + #include "../src/cap_ffmpeg_api.hpp" +#endif + +#endif /* __OPENCV_PRECOMP_H__ */ diff --git a/modules/gpucodec/src/thread.cpp b/modules/gpucodec/src/thread.cpp new file mode 100644 index 000000000..db9f3de39 --- /dev/null +++ b/modules/gpucodec/src/thread.cpp @@ -0,0 +1,174 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_NVCUVID + +using namespace cv::gpu::detail; + +#ifdef WIN32 + +namespace +{ + struct UserData + { + Thread::Func func; + void* param; + }; + + DWORD WINAPI WinThreadFunction(LPVOID lpParam) + { + UserData* userData = static_cast(lpParam); + + userData->func(userData->param); + + return 0; + } +} + +class cv::gpu::detail::Thread::Impl +{ +public: + Impl(Thread::Func func, void* userData) + { + userData_.func = func; + userData_.param = userData; + + thread_ = CreateThread( + NULL, // default security attributes + 0, // use default stack size + WinThreadFunction, // thread function name + &userData_, // argument to thread function + 0, // use default creation flags + &threadId_); // returns the thread identifier + } + + ~Impl() + { + CloseHandle(thread_); + } + + void wait() + { + WaitForSingleObject(thread_, INFINITE); + } + +private: + UserData userData_; + HANDLE thread_; + DWORD threadId_; +}; + +#else + +namespace +{ + struct UserData + { + Thread::Func func; + void* param; + }; + + void* PThreadFunction(void* lpParam) + { + UserData* userData = static_cast(lpParam); + + userData->func(userData->param); + + return 0; + } +} + +class cv::gpu::detail::Thread::Impl +{ +public: + Impl(Thread::Func func, void* userData) + { + userData_.func = func; + userData_.param = userData; + + pthread_create(&thread_, NULL, PThreadFunction, &userData_); + } + + ~Impl() + { + pthread_detach(thread_); + } + + void wait() + { + pthread_join(thread_, NULL); + } + +private: + pthread_t thread_; + UserData userData_; +}; + +#endif + +cv::gpu::detail::Thread::Thread(Func func, void* userData) : + impl_(new Impl(func, userData)) +{ +} + +void cv::gpu::detail::Thread::wait() +{ + impl_->wait(); +} + +void cv::gpu::detail::Thread::sleep(int ms) +{ +#ifdef WIN32 + ::Sleep(ms); +#else + ::usleep(ms * 1000); +#endif +} + +template <> void cv::Ptr::delete_obj() +{ + if (obj) delete obj; +} + +#endif // HAVE_NVCUVID diff --git a/modules/gpu/src/thread_wrappers.h b/modules/gpucodec/src/thread.h similarity index 61% rename from modules/gpu/src/thread_wrappers.h rename to modules/gpucodec/src/thread.h index da811737d..1489f5830 100644 --- a/modules/gpu/src/thread_wrappers.h +++ b/modules/gpucodec/src/thread.h @@ -43,70 +43,31 @@ #ifndef __THREAD_WRAPPERS_H__ #define __THREAD_WRAPPERS_H__ -#include "precomp.hpp" +#include "opencv2/core.hpp" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +namespace cv { namespace gpu { namespace detail { -namespace cv { namespace gpu +class Thread { - namespace detail - { - class CriticalSection - { - public: - CriticalSection(); - ~CriticalSection(); +public: + typedef void (*Func)(void* userData); - void enter(); - void leave(); + explicit Thread(Func func, void* userData = 0); - class AutoLock - { - public: - explicit AutoLock(CriticalSection& criticalSection) : - criticalSection_(criticalSection) - { - criticalSection_.enter(); - } + void wait(); - ~AutoLock() - { - criticalSection_.leave(); - } + static void sleep(int ms); - private: - CriticalSection& criticalSection_; - }; + class Impl; - private: - CriticalSection(const CriticalSection&); - CriticalSection& operator=(const CriticalSection&); +private: + cv::Ptr impl_; +}; - class Impl; - std::auto_ptr impl_; - }; +}}} - class Thread - { - public: - explicit Thread(void (*func)(void* userData), void* userData = 0); - ~Thread(); - - void wait(); - - static void sleep(int ms); - - private: - Thread(const Thread&); - Thread& operator=(const Thread&); - - class Impl; - std::auto_ptr impl_; - }; - - } -}} - -#endif // HAVE_CUDA +namespace cv { + template <> void Ptr::delete_obj(); +} #endif // __THREAD_WRAPPERS_H__ diff --git a/modules/gpu/src/video_decoder.cpp b/modules/gpucodec/src/video_decoder.cpp similarity index 97% rename from modules/gpu/src/video_decoder.cpp rename to modules/gpucodec/src/video_decoder.cpp index fe897895d..7e28e872b 100644 --- a/modules/gpu/src/video_decoder.cpp +++ b/modules/gpucodec/src/video_decoder.cpp @@ -40,10 +40,9 @@ // //M*/ -#include "video_decoder.h" -#include "frame_queue.h" +#include "precomp.hpp" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#ifdef HAVE_NVCUVID void cv::gpu::detail::VideoDecoder::create(const VideoReader_GPU::FormatInfo& videoFormat) { @@ -113,4 +112,4 @@ void cv::gpu::detail::VideoDecoder::release() } } -#endif // HAVE_CUDA +#endif // HAVE_NVCUVID diff --git a/modules/gpucodec/src/video_decoder.h b/modules/gpucodec/src/video_decoder.h new file mode 100644 index 000000000..e31ec1a0d --- /dev/null +++ b/modules/gpucodec/src/video_decoder.h @@ -0,0 +1,111 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __VIDEO_DECODER_H__ +#define __VIDEO_DECODER_H__ + +#include "opencv2/core/gpu_private.hpp" +#include "opencv2/gpucodec.hpp" + +#include + +namespace cv { namespace gpu { namespace detail +{ + +class VideoDecoder +{ +public: + VideoDecoder(const VideoReader_GPU::FormatInfo& videoFormat, CUvideoctxlock lock) : lock_(lock), decoder_(0) + { + create(videoFormat); + } + + ~VideoDecoder() + { + release(); + } + + void create(const VideoReader_GPU::FormatInfo& videoFormat); + void release(); + + // Get the code-type currently used. + cudaVideoCodec codec() const { return createInfo_.CodecType; } + unsigned long maxDecodeSurfaces() const { return createInfo_.ulNumDecodeSurfaces; } + + unsigned long frameWidth() const { return createInfo_.ulWidth; } + unsigned long frameHeight() const { return createInfo_.ulHeight; } + + unsigned long targetWidth() const { return createInfo_.ulTargetWidth; } + unsigned long targetHeight() const { return createInfo_.ulTargetHeight; } + + cudaVideoChromaFormat chromaFormat() const { return createInfo_.ChromaFormat; } + + bool decodePicture(CUVIDPICPARAMS* picParams) + { + return cuvidDecodePicture(decoder_, picParams) == CUDA_SUCCESS; + } + + cv::gpu::GpuMat mapFrame(int picIdx, CUVIDPROCPARAMS& videoProcParams) + { + CUdeviceptr ptr; + unsigned int pitch; + + cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) ); + + return GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch); + } + + void unmapFrame(cv::gpu::GpuMat& frame) + { + cuSafeCall( cuvidUnmapVideoFrame(decoder_, (CUdeviceptr) frame.data) ); + frame.release(); + } + +private: + CUvideoctxlock lock_; + CUVIDDECODECREATEINFO createInfo_; + CUvideodecoder decoder_; +}; + +}}} + +#endif // __VIDEO_DECODER_H__ diff --git a/modules/gpu/src/video_parser.cpp b/modules/gpucodec/src/video_parser.cpp similarity index 98% rename from modules/gpu/src/video_parser.cpp rename to modules/gpucodec/src/video_parser.cpp index ab96d12b9..620f85fe8 100644 --- a/modules/gpu/src/video_parser.cpp +++ b/modules/gpucodec/src/video_parser.cpp @@ -40,9 +40,9 @@ // //M*/ -#include "video_parser.h" +#include "precomp.hpp" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#ifdef HAVE_NVCUVID cv::gpu::detail::VideoParser::VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue) : videoDecoder_(videoDecoder), frameQueue_(frameQueue), unparsedPackets_(0), hasError_(false) @@ -158,4 +158,4 @@ int CUDAAPI cv::gpu::detail::VideoParser::HandlePictureDisplay(void* userData, C return true; } -#endif // HAVE_CUDA +#endif // HAVE_NVCUVID diff --git a/modules/gpu/src/video_parser.h b/modules/gpucodec/src/video_parser.h similarity index 54% rename from modules/gpu/src/video_parser.h rename to modules/gpucodec/src/video_parser.h index 15700664b..a26dd3eb7 100644 --- a/modules/gpu/src/video_parser.h +++ b/modules/gpucodec/src/video_parser.h @@ -43,58 +43,52 @@ #ifndef __VIDEO_PARSER_H__ #define __VIDEO_PARSER_H__ -#include "precomp.hpp" - +#include "opencv2/core/gpu_private.hpp" +#include "opencv2/gpucodec.hpp" #include "frame_queue.h" #include "video_decoder.h" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#include -namespace cv { namespace gpu +namespace cv { namespace gpu { namespace detail { - namespace detail + +class VideoParser +{ +public: + VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue); + + ~VideoParser() { - class VideoParser - { - public: - VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue); - - ~VideoParser() - { - cuvidDestroyVideoParser(parser_); - } - - bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream); - - bool hasError() const { return hasError_; } - - private: - VideoParser(const VideoParser&); - VideoParser& operator =(const VideoParser&); - - VideoDecoder* videoDecoder_; - FrameQueue* frameQueue_; - CUvideoparser parser_; - int unparsedPackets_; - volatile bool hasError_; - - // Called when the decoder encounters a video format change (or initial sequence header) - // This particular implementation of the callback returns 0 in case the video format changes - // to something different than the original format. Returning 0 causes a stop of the app. - static int CUDAAPI HandleVideoSequence(void* pUserData, CUVIDEOFORMAT* pFormat); - - // Called by the video parser to decode a single picture - // Since the parser will deliver data as fast as it can, we need to make sure that the picture - // index we're attempting to use for decode is no longer used for display - static int CUDAAPI HandlePictureDecode(void* pUserData, CUVIDPICPARAMS* pPicParams); - - // Called by the video parser to display a video frame (in the case of field pictures, there may be - // 2 decode calls per 1 display call, since two fields make up one frame) - static int CUDAAPI HandlePictureDisplay(void* pUserData, CUVIDPARSERDISPINFO* pPicParams); - }; + cuvidDestroyVideoParser(parser_); } -}} -#endif // HAVE_CUDA + bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream); + + bool hasError() const { return hasError_; } + +private: + VideoDecoder* videoDecoder_; + FrameQueue* frameQueue_; + CUvideoparser parser_; + int unparsedPackets_; + volatile bool hasError_; + + // Called when the decoder encounters a video format change (or initial sequence header) + // This particular implementation of the callback returns 0 in case the video format changes + // to something different than the original format. Returning 0 causes a stop of the app. + static int CUDAAPI HandleVideoSequence(void* pUserData, CUVIDEOFORMAT* pFormat); + + // Called by the video parser to decode a single picture + // Since the parser will deliver data as fast as it can, we need to make sure that the picture + // index we're attempting to use for decode is no longer used for display + static int CUDAAPI HandlePictureDecode(void* pUserData, CUVIDPICPARAMS* pPicParams); + + // Called by the video parser to display a video frame (in the case of field pictures, there may be + // 2 decode calls per 1 display call, since two fields make up one frame) + static int CUDAAPI HandlePictureDisplay(void* pUserData, CUVIDPARSERDISPINFO* pPicParams); +}; + +}}} #endif // __VIDEO_PARSER_H__ diff --git a/modules/gpu/src/video_reader.cpp b/modules/gpucodec/src/video_reader.cpp similarity index 89% rename from modules/gpu/src/video_reader.cpp rename to modules/gpucodec/src/video_reader.cpp index 7bc63dae3..dbb4bbcf2 100644 --- a/modules/gpu/src/video_reader.cpp +++ b/modules/gpucodec/src/video_reader.cpp @@ -42,7 +42,7 @@ #include "precomp.hpp" -#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER) || !defined(HAVE_NVCUVID) +#ifndef HAVE_NVCUVID class cv::gpu::VideoReader_GPU::Impl { @@ -61,14 +61,7 @@ cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::VideoReader_GPU::format() const { bool cv::gpu::VideoReader_GPU::VideoSource::parseVideoData(const unsigned char*, size_t, bool) { throw_no_cuda(); return false; } void cv::gpu::VideoReader_GPU::dumpFormat(std::ostream&) { throw_no_cuda(); } -#else // HAVE_CUDA - -#include "frame_queue.h" -#include "video_decoder.h" -#include "video_parser.h" - -#include "cuvid_video_source.h" -#include "ffmpeg_video_source.h" +#else // HAVE_NVCUVID class cv::gpu::VideoReader_GPU::Impl { @@ -81,14 +74,11 @@ public: cv::gpu::VideoReader_GPU::FormatInfo format() const { return videoSource_->format(); } private: - Impl(const Impl&); - Impl& operator =(const Impl&); - cv::Ptr videoSource_; - std::auto_ptr frameQueue_; - std::auto_ptr videoDecoder_; - std::auto_ptr videoParser_; + cv::Ptr frameQueue_; + cv::Ptr videoDecoder_; + cv::Ptr videoParser_; CUvideoctxlock lock_; @@ -110,12 +100,12 @@ cv::gpu::VideoReader_GPU::Impl::Impl(const cv::Ptr& source) : cuSafeCall( cuCtxGetCurrent(&ctx) ); cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) ); - frameQueue_.reset(new detail::FrameQueue); - videoDecoder_.reset(new detail::VideoDecoder(videoSource_->format(), lock_)); - videoParser_.reset(new detail::VideoParser(videoDecoder_.get(), frameQueue_.get())); + frameQueue_ = new detail::FrameQueue; + videoDecoder_ = new detail::VideoDecoder(videoSource_->format(), lock_); + videoParser_ = new detail::VideoParser(videoDecoder_, frameQueue_); - videoSource_->setFrameQueue(frameQueue_.get()); - videoSource_->setVideoParser(videoParser_.get()); + videoSource_->setFrameQueue(frameQueue_); + videoSource_->setVideoParser(videoParser_); videoSource_->start(); } @@ -126,12 +116,10 @@ cv::gpu::VideoReader_GPU::Impl::~Impl() videoSource_->stop(); } -namespace cv { namespace gpu { namespace cudev { - namespace video_decoding - { - void loadHueCSC(float hueCSC[9]); - void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz interopFrame, cudaStream_t stream = 0); - } +namespace cv { namespace gpu { namespace cudev +{ + void loadHueCSC(float hueCSC[9]); + void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz interopFrame, cudaStream_t stream = 0); }}} namespace @@ -187,7 +175,7 @@ namespace void cudaPostProcessFrame(const cv::gpu::GpuMat& decodedFrame, cv::gpu::GpuMat& interopFrame, int width, int height) { - using namespace cv::gpu::cudev::video_decoding; + using namespace cv::gpu::cudev; static bool updateCSC = true; static float hueColorSpaceMat[9]; @@ -210,7 +198,7 @@ namespace loadHueCSC(hueColorSpaceMat); - NV12ToARGB_gpu(decodedFrame, interopFrame); + NV12_to_RGB(decodedFrame, interopFrame); } } @@ -329,17 +317,17 @@ void cv::gpu::VideoReader_GPU::open(const cv::Ptr& source) { CV_Assert( !source.empty() ); close(); - impl_.reset(new Impl(source)); + impl_ = new Impl(source); } bool cv::gpu::VideoReader_GPU::isOpened() const { - return impl_.get() != 0; + return !impl_.empty(); } void cv::gpu::VideoReader_GPU::close() { - impl_.reset(); + impl_.release(); } bool cv::gpu::VideoReader_GPU::read(GpuMat& image) @@ -396,4 +384,9 @@ void cv::gpu::VideoReader_GPU::dumpFormat(std::ostream& st) st << "Chroma Format : " << chromas[_format.chromaFormat] << std::endl; } -#endif // HAVE_CUDA +#endif // HAVE_NVCUVID + +template <> void cv::Ptr::delete_obj() +{ + if (obj) delete obj; +} diff --git a/modules/gpu/src/video_writer.cpp b/modules/gpucodec/src/video_writer.cpp similarity index 97% rename from modules/gpu/src/video_writer.cpp rename to modules/gpucodec/src/video_writer.cpp index 987be9727..94100c0b8 100644 --- a/modules/gpu/src/video_writer.cpp +++ b/modules/gpucodec/src/video_writer.cpp @@ -42,7 +42,7 @@ #include "precomp.hpp" -#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER) || !defined(HAVE_NVCUVID) || !defined(WIN32) +#if !defined(HAVE_NVCUVID) || !defined(WIN32) class cv::gpu::VideoWriter_GPU::Impl { @@ -70,13 +70,6 @@ void cv::gpu::VideoWriter_GPU::EncoderParams::save(const String&) const { throw_ #else // !defined HAVE_CUDA || !defined WIN32 -#ifdef HAVE_FFMPEG - #include "../src/cap_ffmpeg_impl.hpp" -#else - #include "../src/cap_ffmpeg_api.hpp" -#endif - - /////////////////////////////////////////////////////////////////////////// // VideoWriter_GPU::Impl @@ -91,7 +84,7 @@ namespace err = NVGetHWEncodeCaps(); if (err) - CV_Error(CV_GpuNotSupported, "No CUDA capability present"); + CV_Error(cv::Error::GpuNotSupported, "No CUDA capability present"); // Create the Encoder API Interface err = NVCreateEncoder(&encoder_); @@ -212,7 +205,7 @@ void cv::gpu::VideoWriter_GPU::Impl::initEncoder(double fps) }; err = NVSetCodec(encoder_, codecs_id[codec_]); if (err) - CV_Error(CV_StsNotImplemented, "Codec format is not supported"); + CV_Error(cv::Error::StsNotImplemented, "Codec format is not supported"); // Set default params @@ -501,14 +494,6 @@ void cv::gpu::VideoWriter_GPU::Impl::createHWEncoder() CV_Assert( err == 0 ); } -namespace cv { namespace gpu { namespace cudev -{ - namespace video_encoding - { - void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst); - } -}}} - namespace { // UYVY/YUY2 are both 4:2:2 formats (16bpc) @@ -644,6 +629,11 @@ namespace } } +namespace cv { namespace gpu { namespace cudev +{ + void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream = 0); +}}} + void cv::gpu::VideoWriter_GPU::Impl::write(const cv::gpu::GpuMat& frame, bool lastFrame) { if (inputFormat_ == SF_BGR) @@ -674,7 +664,7 @@ void cv::gpu::VideoWriter_GPU::Impl::write(const cv::gpu::GpuMat& frame, bool la CV_Assert( res == CUDA_SUCCESS ); if (inputFormat_ == SF_BGR) - cv::gpu::cudev::video_encoding::YV12_gpu(frame, frame.channels(), videoFrame_); + cv::gpu::cudev::RGB_to_YV12(frame, frame.channels(), videoFrame_); else { switch (surfaceFormat_) @@ -829,11 +819,14 @@ void EncoderCallBackFFMPEG::releaseBitStream(unsigned char* data, int size) void EncoderCallBackFFMPEG::onBeginFrame(int frameNumber, PicType picType) { + (void) frameNumber; isKeyFrame_ = picType == IFRAME; } void EncoderCallBackFFMPEG::onEndFrame(int frameNumber, PicType picType) { + (void) frameNumber; + (void) picType; } /////////////////////////////////////////////////////////////////////////// @@ -885,23 +878,23 @@ void cv::gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, void cv::gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format) { close(); - impl_.reset(new Impl(encoderCallback, frameSize, fps, format)); + impl_ = new Impl(encoderCallback, frameSize, fps, format); } void cv::gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format) { close(); - impl_.reset(new Impl(encoderCallback, frameSize, fps, params, format)); + impl_ = new Impl(encoderCallback, frameSize, fps, params, format); } bool cv::gpu::VideoWriter_GPU::isOpened() const { - return impl_.get() != 0; + return !impl_.empty(); } void cv::gpu::VideoWriter_GPU::close() { - impl_.reset(); + impl_.release(); } void cv::gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat& image, bool lastFrame) @@ -1002,3 +995,8 @@ void cv::gpu::VideoWriter_GPU::EncoderParams::save(const String& configFile) con } #endif // !defined HAVE_CUDA || !defined WIN32 + +template <> void cv::Ptr::delete_obj() +{ + if (obj) delete obj; +} diff --git a/modules/gpucodec/test/test_main.cpp b/modules/gpucodec/test/test_main.cpp new file mode 100644 index 000000000..958adfee5 --- /dev/null +++ b/modules/gpucodec/test/test_main.cpp @@ -0,0 +1,45 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +CV_TEST_MAIN("gpu") diff --git a/modules/gpucodec/test/test_precomp.cpp b/modules/gpucodec/test/test_precomp.cpp new file mode 100644 index 000000000..0fb652180 --- /dev/null +++ b/modules/gpucodec/test/test_precomp.cpp @@ -0,0 +1,43 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" diff --git a/modules/gpucodec/test/test_precomp.hpp b/modules/gpucodec/test/test_precomp.hpp new file mode 100644 index 000000000..0dc79935d --- /dev/null +++ b/modules/gpucodec/test/test_precomp.hpp @@ -0,0 +1,60 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-declarations" +# if defined __clang__ || defined __APPLE__ +# pragma GCC diagnostic ignored "-Wmissing-prototypes" +# pragma GCC diagnostic ignored "-Wextra" +# endif +#endif + +#ifndef __OPENCV_TEST_PRECOMP_HPP__ +#define __OPENCV_TEST_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/ts/gpu_test.hpp" + +#include "opencv2/gpucodec.hpp" +#include "opencv2/highgui.hpp" + +#endif diff --git a/modules/gpu/test/test_video.cpp b/modules/gpucodec/test/test_video.cpp similarity index 75% rename from modules/gpu/test/test_video.cpp rename to modules/gpucodec/test/test_video.cpp index f28cd3cf4..55fc3f87c 100644 --- a/modules/gpu/test/test_video.cpp +++ b/modules/gpucodec/test/test_video.cpp @@ -42,29 +42,21 @@ #include "test_precomp.hpp" -#if defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#ifdef HAVE_NVCUVID + +PARAM_TEST_CASE(Video, cv::gpu::DeviceInfo, std::string) +{ +}; ////////////////////////////////////////////////////// // VideoReader -PARAM_TEST_CASE(VideoReader, cv::gpu::DeviceInfo, std::string) +GPU_TEST_P(Video, Reader) { - cv::gpu::DeviceInfo devInfo; - std::string inputFile; + cv::gpu::setDevice(GET_PARAM(0).deviceID()); - virtual void SetUp() - { - devInfo = GET_PARAM(0); - inputFile = GET_PARAM(1); + const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1); - cv::gpu::setDevice(devInfo.deviceID()); - - inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + inputFile; - } -}; - -GPU_TEST_P(VideoReader, Regression) -{ cv::gpu::VideoReader_GPU reader(inputFile); ASSERT_TRUE(reader.isOpened()); @@ -80,33 +72,17 @@ GPU_TEST_P(VideoReader, Regression) ASSERT_FALSE(reader.isOpened()); } -INSTANTIATE_TEST_CASE_P(GPU_Video, VideoReader, testing::Combine( - ALL_DEVICES, - testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")))); - ////////////////////////////////////////////////////// // VideoWriter #ifdef WIN32 -PARAM_TEST_CASE(VideoWriter, cv::gpu::DeviceInfo, std::string) +GPU_TEST_P(Video, Writer) { - cv::gpu::DeviceInfo devInfo; - std::string inputFile; + cv::gpu::setDevice(GET_PARAM(0).deviceID()); - virtual void SetUp() - { - devInfo = GET_PARAM(0); - inputFile = GET_PARAM(1); + const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1); - cv::gpu::setDevice(devInfo.deviceID()); - - inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + std::string("video/") + inputFile; - } -}; - -GPU_TEST_P(VideoWriter, Regression) -{ std::string outputFile = cv::tempfile(".avi"); const double FPS = 25.0; @@ -144,10 +120,10 @@ GPU_TEST_P(VideoWriter, Regression) } } -INSTANTIATE_TEST_CASE_P(GPU_Video, VideoWriter, testing::Combine( +#endif // WIN32 + +INSTANTIATE_TEST_CASE_P(GPU_Codec, Video, testing::Combine( ALL_DEVICES, testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")))); -#endif // WIN32 - -#endif // defined(HAVE_CUDA) && defined(HAVE_NVCUVID) +#endif // HAVE_NVCUVID diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt index 6c6022c72..d111a79eb 100644 --- a/modules/superres/CMakeLists.txt +++ b/modules/superres/CMakeLists.txt @@ -4,4 +4,4 @@ endif() set(the_description "Super Resolution") ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef) -ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui) +ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui opencv_gpucodec) diff --git a/modules/superres/src/frame_source.cpp b/modules/superres/src/frame_source.cpp index 5e6ed0a2b..cba2b14ea 100644 --- a/modules/superres/src/frame_source.cpp +++ b/modules/superres/src/frame_source.cpp @@ -187,7 +187,7 @@ Ptr cv::superres::createFrameSource_Camera(int deviceId) ////////////////////////////////////////////////////// // VideoFrameSource_GPU -#ifndef HAVE_OPENCV_GPU +#ifndef HAVE_OPENCV_GPUCODEC Ptr cv::superres::createFrameSource_Video_GPU(const String& fileName) { @@ -196,7 +196,7 @@ Ptr cv::superres::createFrameSource_Video_GPU(const String& fileNam return Ptr(); } -#else // HAVE_OPENCV_GPU +#else // HAVE_OPENCV_GPUCODEC namespace { @@ -250,4 +250,4 @@ Ptr cv::superres::createFrameSource_Video_GPU(const String& fileNam return new VideoFrameSource(fileName); } -#endif // HAVE_OPENCV_GPU +#endif // HAVE_OPENCV_GPUCODEC diff --git a/modules/superres/src/precomp.hpp b/modules/superres/src/precomp.hpp index 429bd485a..960d9b71c 100644 --- a/modules/superres/src/precomp.hpp +++ b/modules/superres/src/precomp.hpp @@ -60,6 +60,10 @@ # include "opencv2/core/gpu_private.hpp" #endif +#ifdef HAVE_OPENCV_GPUCODEC +# include "opencv2/gpucodec.hpp" +#endif + #ifdef HAVE_OPENCV_HIGHGUI #include "opencv2/highgui.hpp" #endif diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt index ee5910630..015df939b 100644 --- a/samples/gpu/CMakeLists.txt +++ b/samples/gpu/CMakeLists.txt @@ -1,7 +1,8 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc opencv_highgui opencv_ml opencv_video opencv_objdetect opencv_features2d opencv_calib3d opencv_legacy opencv_contrib opencv_gpu - opencv_nonfree opencv_softcascade opencv_superres) + opencv_nonfree opencv_softcascade opencv_superres + opencv_gpucodec) ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS}) diff --git a/samples/gpu/video_reader.cpp b/samples/gpu/video_reader.cpp index a4c068516..f9bbbbb33 100644 --- a/samples/gpu/video_reader.cpp +++ b/samples/gpu/video_reader.cpp @@ -4,11 +4,11 @@ #include #include -#include +#include #include -#include -#include -#include +#include +#include +#include int main(int argc, const char* argv[]) { diff --git a/samples/gpu/video_writer.cpp b/samples/gpu/video_writer.cpp index 797656752..aed76e04e 100644 --- a/samples/gpu/video_writer.cpp +++ b/samples/gpu/video_writer.cpp @@ -2,10 +2,10 @@ #include #include -#include "opencv2/core/core.hpp" -#include "opencv2/gpu/gpu.hpp" -#include "opencv2/highgui/highgui.hpp" -#include "opencv2/contrib/contrib.hpp" +#include "opencv2/core.hpp" +#include "opencv2/gpucodec.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/contrib.hpp" int main(int argc, const char* argv[]) { From 31c8b527c618112aa132f2200e388bb1b8115883 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 17 Apr 2013 17:39:17 +0400 Subject: [PATCH 02/49] gpuarithm module for arithmetics operations on matrices --- .../include/opencv2/core/cuda/emulation.hpp | 125 +- modules/gpu/CMakeLists.txt | 6 +- modules/gpu/doc/gpu.rst | 3 - modules/gpu/doc/image_processing.rst | 22 - modules/gpu/include/opencv2/gpu.hpp | 239 +- modules/gpu/src/cuda/element_operations.cu | 2636 ----------------- modules/gpu/src/cuda/matrix_reductions.cu | 1366 --------- modules/gpu/src/cuda/safe_call.hpp | 10 - modules/gpu/src/error.cpp | 23 - modules/gpu/src/imgproc.cpp | 74 + modules/gpu/src/precomp.hpp | 4 - modules/gpuarithm/CMakeLists.txt | 13 + modules/gpuarithm/doc/gpuarithm.rst | 10 + .../doc/matrix_reductions.rst | 0 .../doc/operations_on_matrices.rst | 0 .../doc/per_element_operations.rst | 22 + .../gpuarithm/include/opencv2/gpuarithm.hpp | 279 ++ modules/{gpu => gpuarithm}/perf/perf_core.cpp | 0 modules/gpuarithm/perf/perf_main.cpp | 47 + modules/gpuarithm/perf/perf_precomp.cpp | 43 + modules/gpuarithm/perf/perf_precomp.hpp | 64 + modules/{gpu => gpuarithm}/src/arithm.cpp | 86 +- modules/gpuarithm/src/cuda/absdiff_mat.cu | 147 + modules/gpuarithm/src/cuda/absdiff_scalar.cu | 98 + modules/gpuarithm/src/cuda/add_mat.cu | 185 ++ modules/gpuarithm/src/cuda/add_scalar.cu | 148 + modules/gpuarithm/src/cuda/add_weighted.cu | 364 +++ .../gpuarithm/src/cuda/arithm_func_traits.hpp | 145 + modules/gpuarithm/src/cuda/bitwise_mat.cu | 126 + modules/gpuarithm/src/cuda/bitwise_scalar.cu | 104 + modules/gpuarithm/src/cuda/cmp_mat.cu | 206 ++ modules/gpuarithm/src/cuda/cmp_scalar.cu | 284 ++ modules/gpuarithm/src/cuda/countnonzero.cu | 175 ++ modules/gpuarithm/src/cuda/div_inv.cu | 144 + modules/gpuarithm/src/cuda/div_mat.cu | 230 ++ modules/gpuarithm/src/cuda/div_scalar.cu | 144 + modules/gpuarithm/src/cuda/math.cu | 302 ++ modules/gpuarithm/src/cuda/minmax.cu | 246 ++ modules/gpuarithm/src/cuda/minmax_mat.cu | 228 ++ modules/gpuarithm/src/cuda/minmaxloc.cu | 235 ++ modules/gpuarithm/src/cuda/mul_mat.cu | 211 ++ modules/gpuarithm/src/cuda/mul_scalar.cu | 144 + .../src/cuda/polar_cart.cu} | 0 modules/gpuarithm/src/cuda/reduce.cu | 330 +++ .../src/cuda/split_merge.cu | 0 modules/gpuarithm/src/cuda/sub_mat.cu | 185 ++ modules/gpuarithm/src/cuda/sub_scalar.cu | 148 + modules/gpuarithm/src/cuda/sum.cu | 380 +++ modules/gpuarithm/src/cuda/threshold.cu | 114 + modules/gpuarithm/src/cuda/transpose.cu | 122 + modules/gpuarithm/src/cuda/unroll_detail.hpp | 135 + .../src/element_operations.cpp | 218 +- .../src/matrix_reductions.cpp | 0 modules/gpuarithm/src/precomp.cpp | 43 + modules/gpuarithm/src/precomp.hpp | 58 + .../{gpu => gpuarithm}/src/split_merge.cpp | 0 modules/{gpu => gpuarithm}/test/test_core.cpp | 0 modules/gpuarithm/test/test_main.cpp | 120 + modules/gpuarithm/test/test_precomp.cpp | 43 + modules/gpuarithm/test/test_precomp.hpp | 60 + modules/stitching/CMakeLists.txt | 2 +- modules/superres/CMakeLists.txt | 2 +- samples/cpp/CMakeLists.txt | 1 + samples/gpu/CMakeLists.txt | 2 +- 64 files changed, 6425 insertions(+), 4476 deletions(-) delete mode 100644 modules/gpu/src/cuda/element_operations.cu delete mode 100644 modules/gpu/src/cuda/matrix_reductions.cu create mode 100644 modules/gpuarithm/CMakeLists.txt create mode 100644 modules/gpuarithm/doc/gpuarithm.rst rename modules/{gpu => gpuarithm}/doc/matrix_reductions.rst (100%) rename modules/{gpu => gpuarithm}/doc/operations_on_matrices.rst (100%) rename modules/{gpu => gpuarithm}/doc/per_element_operations.rst (95%) create mode 100644 modules/gpuarithm/include/opencv2/gpuarithm.hpp rename modules/{gpu => gpuarithm}/perf/perf_core.cpp (100%) create mode 100644 modules/gpuarithm/perf/perf_main.cpp create mode 100644 modules/gpuarithm/perf/perf_precomp.cpp create mode 100644 modules/gpuarithm/perf/perf_precomp.hpp rename modules/{gpu => gpuarithm}/src/arithm.cpp (90%) create mode 100644 modules/gpuarithm/src/cuda/absdiff_mat.cu create mode 100644 modules/gpuarithm/src/cuda/absdiff_scalar.cu create mode 100644 modules/gpuarithm/src/cuda/add_mat.cu create mode 100644 modules/gpuarithm/src/cuda/add_scalar.cu create mode 100644 modules/gpuarithm/src/cuda/add_weighted.cu create mode 100644 modules/gpuarithm/src/cuda/arithm_func_traits.hpp create mode 100644 modules/gpuarithm/src/cuda/bitwise_mat.cu create mode 100644 modules/gpuarithm/src/cuda/bitwise_scalar.cu create mode 100644 modules/gpuarithm/src/cuda/cmp_mat.cu create mode 100644 modules/gpuarithm/src/cuda/cmp_scalar.cu create mode 100644 modules/gpuarithm/src/cuda/countnonzero.cu create mode 100644 modules/gpuarithm/src/cuda/div_inv.cu create mode 100644 modules/gpuarithm/src/cuda/div_mat.cu create mode 100644 modules/gpuarithm/src/cuda/div_scalar.cu create mode 100644 modules/gpuarithm/src/cuda/math.cu create mode 100644 modules/gpuarithm/src/cuda/minmax.cu create mode 100644 modules/gpuarithm/src/cuda/minmax_mat.cu create mode 100644 modules/gpuarithm/src/cuda/minmaxloc.cu create mode 100644 modules/gpuarithm/src/cuda/mul_mat.cu create mode 100644 modules/gpuarithm/src/cuda/mul_scalar.cu rename modules/{gpu/src/cuda/mathfunc.cu => gpuarithm/src/cuda/polar_cart.cu} (100%) create mode 100644 modules/gpuarithm/src/cuda/reduce.cu rename modules/{gpu => gpuarithm}/src/cuda/split_merge.cu (100%) create mode 100644 modules/gpuarithm/src/cuda/sub_mat.cu create mode 100644 modules/gpuarithm/src/cuda/sub_scalar.cu create mode 100644 modules/gpuarithm/src/cuda/sum.cu create mode 100644 modules/gpuarithm/src/cuda/threshold.cu create mode 100644 modules/gpuarithm/src/cuda/transpose.cu create mode 100644 modules/gpuarithm/src/cuda/unroll_detail.hpp rename modules/{gpu => gpuarithm}/src/element_operations.cpp (97%) rename modules/{gpu => gpuarithm}/src/matrix_reductions.cpp (100%) create mode 100644 modules/gpuarithm/src/precomp.cpp create mode 100644 modules/gpuarithm/src/precomp.hpp rename modules/{gpu => gpuarithm}/src/split_merge.cpp (100%) rename modules/{gpu => gpuarithm}/test/test_core.cpp (100%) create mode 100644 modules/gpuarithm/test/test_main.cpp create mode 100644 modules/gpuarithm/test/test_precomp.cpp create mode 100644 modules/gpuarithm/test/test_precomp.hpp diff --git a/modules/core/include/opencv2/core/cuda/emulation.hpp b/modules/core/include/opencv2/core/cuda/emulation.hpp index 3df26468b..b484f2378 100644 --- a/modules/core/include/opencv2/core/cuda/emulation.hpp +++ b/modules/core/include/opencv2/core/cuda/emulation.hpp @@ -43,6 +43,7 @@ #ifndef OPENCV_GPU_EMULATION_HPP_ #define OPENCV_GPU_EMULATION_HPP_ +#include "common.hpp" #include "warp_reduce.hpp" namespace cv { namespace gpu { namespace cudev @@ -131,8 +132,130 @@ namespace cv { namespace gpu { namespace cudev return ::atomicMin(address, val); #endif } + }; // struct cmem + + struct glob + { + static __device__ __forceinline__ int atomicAdd(int* address, int val) + { + return ::atomicAdd(address, val); + } + static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val) + { + return ::atomicAdd(address, val); + } + static __device__ __forceinline__ float atomicAdd(float* address, float val) + { + #if __CUDA_ARCH__ >= 200 + return ::atomicAdd(address, val); + #else + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(val + __int_as_float(assumed))); + } while (assumed != old); + return __int_as_float(old); + #endif + } + static __device__ __forceinline__ double atomicAdd(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); + #else + (void) address; + (void) val; + return 0.0; + #endif + } + + static __device__ __forceinline__ int atomicMin(int* address, int val) + { + return ::atomicMin(address, val); + } + static __device__ __forceinline__ float atomicMin(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fminf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + (void) address; + (void) val; + return 0.0f; + #endif + } + static __device__ __forceinline__ double atomicMin(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmin(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + (void) address; + (void) val; + return 0.0; + #endif + } + + static __device__ __forceinline__ int atomicMax(int* address, int val) + { + return ::atomicMax(address, val); + } + static __device__ __forceinline__ float atomicMax(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fmaxf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + (void) address; + (void) val; + return 0.0f; + #endif + } + static __device__ __forceinline__ double atomicMax(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + (void) address; + (void) val; + return 0.0; + #endif + } }; - }; + }; //struct Emulation }}} // namespace cv { namespace gpu { namespace cudev #endif /* OPENCV_GPU_EMULATION_HPP_ */ diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 6f2f1145e..2f884b3f9 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -3,7 +3,7 @@ if(ANDROID OR IOS) endif() set(the_description "GPU-accelerated Computer Vision") -ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy) +ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm) ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda") @@ -58,10 +58,6 @@ if(HAVE_CUDA) CUDA_ADD_CUFFT_TO_TARGET(${the_module}) endif() - if(HAVE_CUBLAS) - CUDA_ADD_CUBLAS_TO_TARGET(${the_module}) - endif() - install(FILES src/nvidia/NPP_staging/NPP_staging.hpp src/nvidia/core/NCV.hpp DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2/${name} COMPONENT main) diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst index b21e2abac..f17ed7079 100644 --- a/modules/gpu/doc/gpu.rst +++ b/modules/gpu/doc/gpu.rst @@ -8,10 +8,7 @@ gpu. GPU-accelerated Computer Vision introduction initalization_and_information data_structures - operations_on_matrices - per_element_operations image_processing - matrix_reductions object_detection feature_detection_and_description image_filtering diff --git a/modules/gpu/doc/image_processing.rst b/modules/gpu/doc/image_processing.rst index 7b404c832..69e500374 100644 --- a/modules/gpu/doc/image_processing.rst +++ b/modules/gpu/doc/image_processing.rst @@ -414,28 +414,6 @@ The methods support arbitrary permutations of the original channels, including r -gpu::threshold ------------------- -Applies a fixed-level threshold to each array element. - -.. ocv:function:: double gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()) - - :param src: Source array (single-channel). - - :param dst: Destination array with the same size and type as ``src`` . - - :param thresh: Threshold value. - - :param maxval: Maximum value to use with ``THRESH_BINARY`` and ``THRESH_BINARY_INV`` threshold types. - - :param type: Threshold type. For details, see :ocv:func:`threshold` . The ``THRESH_OTSU`` threshold type is not supported. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`threshold` - - - gpu::resize --------------- Resizes an image. diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp index 0b13fc01d..cfad81738 100644 --- a/modules/gpu/include/opencv2/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu.hpp @@ -50,6 +50,7 @@ #endif #include "opencv2/core/gpumat.hpp" +#include "opencv2/gpuarithm.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/objdetect.hpp" #include "opencv2/features2d.hpp" @@ -269,182 +270,8 @@ CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& //! supports only ksize = 1 and ksize = 3 CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); +////////////////////////////// Image processing ////////////////////////////// -////////////////////////////// Arithmetics /////////////////////////////////// - -//! implements generalized matrix product algorithm GEMM from BLAS -CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, - const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); - -//! transposes the matrix -//! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc) -CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null()); - -//! reverses the order of the rows, columns or both in a matrix -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth -CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null()); - -//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i)) -//! destination array will have the depth type as lut and the same channels number as source -//! supports CV_8UC1, CV_8UC3 types -CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null()); - -//! makes multi-channel array out of several single-channel arrays -CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null()); - -//! makes multi-channel array out of several single-channel arrays -CV_EXPORTS void merge(const std::vector& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! copies each plane of a multi-channel array to a dedicated array -CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null()); - -//! copies each plane of a multi-channel array to a dedicated array -CV_EXPORTS void split(const GpuMat& src, std::vector& dst, Stream& stream = Stream::Null()); - -//! computes magnitude of complex (x(i).re, x(i).im) vector -//! supports only CV_32FC2 type -CV_EXPORTS void magnitude(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes squared magnitude of complex (x(i).re, x(i).im) vector -//! supports only CV_32FC2 type -CV_EXPORTS void magnitudeSqr(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes magnitude of each (x(i), y(i)) vector -//! supports only floating-point source -CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes squared magnitude of each (x(i), y(i)) vector -//! supports only floating-point source -CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes angle (angle(i)) of each (x(i), y(i)) vector -//! supports only floating-point source -CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); - -//! converts Cartesian coordinates to polar -//! supports only floating-point source -CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); - -//! converts polar coordinates to Cartesian -//! supports only floating-point source -CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false, Stream& stream = Stream::Null()); - -//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values -CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, - int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat()); -CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b, - int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf); - - -//////////////////////////// Per-element operations //////////////////////////////////// - -//! adds one matrix to another (c = a + b) -CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); -//! adds scalar to a matrix (c = a + s) -CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); - -//! subtracts one matrix from another (c = a - b) -CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); -//! subtracts scalar from a matrix (c = a - s) -CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); - -//! computes element-wise weighted product of the two arrays (c = scale * a * b) -CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! weighted multiplies matrix to a scalar (c = scale * a * s) -CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); - -//! computes element-wise weighted quotient of the two arrays (c = a / b) -CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted quotient of matrix and scalar (c = a / s) -CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted reciprocal of an array (dst = scale/src2) -CV_EXPORTS void divide(double scale, const GpuMat& b, GpuMat& c, int dtype = -1, Stream& stream = Stream::Null()); - -//! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) -CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, - int dtype = -1, Stream& stream = Stream::Null()); - -//! adds scaled array to another one (dst = alpha*src1 + src2) -static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()) -{ - addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); -} - -//! computes element-wise absolute difference of two arrays (c = abs(a - b)) -CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null()); -//! computes element-wise absolute difference of array and scalar (c = abs(a - s)) -CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null()); - -//! computes absolute value of each matrix element -//! supports CV_16S and CV_32F depth -CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes square of each pixel in an image -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes square root of each pixel in an image -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes exponent of each matrix element (b = e**a) -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null()); - -//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a)) -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null()); - -//! computes power of each matrix element: -// (dst(i,j) = pow( src(i,j) , power), if src.type() is integer -// (dst(i,j) = pow(fabs(src(i,j)), power), otherwise -//! supports all, except depth == CV_64F -CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null()); - -//! compares elements of two arrays (c = a b) -CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); -CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); - -//! performs per-elements bit-wise inversion -CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise disjunction of two arrays -CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise disjunction of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise conjunction of two arrays -CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise conjunction of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise "exclusive or" operation -CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise "exclusive or" of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! pixel by pixel right shift of an image by a constant value -//! supports 1, 3 and 4 channels images with integers elements -CV_EXPORTS void rshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! pixel by pixel left shift of an image by a constant value -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void lshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element minimum of two arrays (dst = min(src1, src2)) -CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element minimum of array and scalar (dst = min(src1, src2)) -CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element maximum of two arrays (dst = max(src1, src2)) -CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element maximum of array and scalar (dst = max(src1, src2)) -CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL, ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL}; @@ -453,9 +280,6 @@ enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA //! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null()); - -////////////////////////////// Image processing ////////////////////////////// - //! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] //! supports only CV_32FC1 map type CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, @@ -521,9 +345,6 @@ CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& strea //! Routines for correcting image color gamma CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null()); -//! applies fixed threshold to the image -CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); - //! resizes the image //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()); @@ -794,62 +615,6 @@ private: CannyBuf cannyBuf_; }; -////////////////////////////// Matrix reductions ////////////////////////////// - -//! computes mean value and standard deviation of all or selected array elements -//! supports only CV_8UC1 type -CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev); -//! buffered version -CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf); - -//! computes norm of array -//! supports NORM_INF, NORM_L1, NORM_L2 -//! supports all matrices except 64F -CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2); -CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf); -CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf); - -//! computes norm of the difference between two arrays -//! supports NORM_INF, NORM_L1, NORM_L2 -//! supports only CV_8UC1 type -CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2); - -//! computes sum of array elements -//! supports only single channel images -CV_EXPORTS Scalar sum(const GpuMat& src); -CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf); -CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); - -//! computes sum of array elements absolute values -//! supports only single channel images -CV_EXPORTS Scalar absSum(const GpuMat& src); -CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf); -CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); - -//! computes squared sum of array elements -//! supports only single channel images -CV_EXPORTS Scalar sqrSum(const GpuMat& src); -CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf); -CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); - -//! finds global minimum and maximum array elements and returns their values -CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat()); -CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf); - -//! finds global minimum and maximum array elements and returns their values with locations -CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, - const GpuMat& mask=GpuMat()); -CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, - const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf); - -//! counts non-zero array elements -CV_EXPORTS int countNonZero(const GpuMat& src); -CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf); - -//! reduces a matrix to a vector -CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()); - - ///////////////////////////// Calibration 3D ////////////////////////////////// CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu deleted file mode 100644 index 095d8bac0..000000000 --- a/modules/gpu/src/cuda/element_operations.cu +++ /dev/null @@ -1,2636 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/transform.hpp" -#include "opencv2/core/cuda/limits.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" -#include "opencv2/core/cuda/simd_functions.hpp" - -using namespace cv::gpu; -using namespace cv::gpu::cudev; - -namespace arithm -{ - template struct ArithmFuncTraits - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 1 }; - }; - - template <> struct ArithmFuncTraits<1, 1> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<1, 2> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<1, 4> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - template <> struct ArithmFuncTraits<2, 1> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<2, 2> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<2, 4> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - template <> struct ArithmFuncTraits<4, 1> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<4, 2> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<4, 4> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; -} - -////////////////////////////////////////////////////////////////////////// -// addMat - -namespace arithm -{ - struct VAdd4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vadd4(a, b); - } - - __device__ __forceinline__ VAdd4() {} - __device__ __forceinline__ VAdd4(const VAdd4& other) {} - }; - - //////////////////////////////////// - - struct VAdd2 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vadd2(a, b); - } - - __device__ __forceinline__ VAdd2() {} - __device__ __forceinline__ VAdd2(const VAdd2& other) {} - }; - - //////////////////////////////////// - - template struct AddMat : binary_function - { - __device__ __forceinline__ D operator ()(T a, T b) const - { - return saturate_cast(a + b); - } - - __device__ __forceinline__ AddMat() {} - __device__ __forceinline__ AddMat(const AddMat& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template <> struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template struct TransformFunctorTraits< arithm::AddMat > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void addMat_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream); - } - - void addMat_v2(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream); - } - - template - void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) - { - if (mask.data) - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, AddMat(), mask, stream); - else - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, AddMat(), WithOutMask(), stream); - } - - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// addScalar - -namespace arithm -{ - template struct AddScalar : unary_function - { - S val; - - explicit AddScalar(S val_) : val(val_) {} - - __device__ __forceinline__ D operator ()(T a) const - { - return saturate_cast(a + val); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::AddScalar > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) - { - AddScalar op(static_cast(val)); - - if (mask.data) - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, mask, stream); - else - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// subMat - -namespace arithm -{ - struct VSub4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vsub4(a, b); - } - - __device__ __forceinline__ VSub4() {} - __device__ __forceinline__ VSub4(const VSub4& other) {} - }; - - //////////////////////////////////// - - struct VSub2 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vsub2(a, b); - } - - __device__ __forceinline__ VSub2() {} - __device__ __forceinline__ VSub2(const VSub2& other) {} - }; - - //////////////////////////////////// - - template struct SubMat : binary_function - { - __device__ __forceinline__ D operator ()(T a, T b) const - { - return saturate_cast(a - b); - } - - __device__ __forceinline__ SubMat() {} - __device__ __forceinline__ SubMat(const SubMat& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template struct TransformFunctorTraits< arithm::SubMat > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void subMat_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VSub4(), WithOutMask(), stream); - } - - void subMat_v2(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VSub2(), WithOutMask(), stream); - } - - template - void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) - { - if (mask.data) - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, SubMat(), mask, stream); - else - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, SubMat(), WithOutMask(), stream); - } - - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// subScalar - -namespace arithm -{ - template - void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) - { - AddScalar op(-static_cast(val)); - - if (mask.data) - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, mask, stream); - else - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// mulMat - -namespace arithm -{ - struct Mul_8uc4_32f : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, float b) const - { - uint res = 0; - - res |= (saturate_cast((0xffu & (a )) * b) ); - res |= (saturate_cast((0xffu & (a >> 8)) * b) << 8); - res |= (saturate_cast((0xffu & (a >> 16)) * b) << 16); - res |= (saturate_cast((0xffu & (a >> 24)) * b) << 24); - - return res; - } - - __device__ __forceinline__ Mul_8uc4_32f() {} - __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {} - }; - - struct Mul_16sc4_32f : binary_function - { - __device__ __forceinline__ short4 operator ()(short4 a, float b) const - { - return make_short4(saturate_cast(a.x * b), saturate_cast(a.y * b), - saturate_cast(a.z * b), saturate_cast(a.w * b)); - } - - __device__ __forceinline__ Mul_16sc4_32f() {} - __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {} - }; - - template struct Mul : binary_function - { - __device__ __forceinline__ D operator ()(T a, T b) const - { - return saturate_cast(a * b); - } - - __device__ __forceinline__ Mul() {} - __device__ __forceinline__ Mul(const Mul& other) {} - }; - - template struct MulScale : binary_function - { - S scale; - - explicit MulScale(S scale_) : scale(scale_) {} - - __device__ __forceinline__ D operator ()(T a, T b) const - { - return saturate_cast(scale * a * b); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits : arithm::ArithmFuncTraits - { - }; - - template struct TransformFunctorTraits< arithm::Mul > : arithm::ArithmFuncTraits - { - }; - - template struct TransformFunctorTraits< arithm::MulScale > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void mulMat_8uc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream); - } - - void mulMat_16sc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream); - } - - template - void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream) - { - if (scale == 1) - { - Mul op; - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); - } - else - { - MulScale op(static_cast(scale)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); - } - } - - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// mulScalar - -namespace arithm -{ - template struct MulScalar : unary_function - { - S val; - - explicit MulScalar(S val_) : val(val_) {} - - __device__ __forceinline__ D operator ()(T a) const - { - return saturate_cast(a * val); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::MulScalar > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - MulScalar op(static_cast(val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// divMat - -namespace arithm -{ - struct Div_8uc4_32f : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, float b) const - { - uint res = 0; - - if (b != 0) - { - b = 1.0f / b; - res |= (saturate_cast((0xffu & (a )) * b) ); - res |= (saturate_cast((0xffu & (a >> 8)) * b) << 8); - res |= (saturate_cast((0xffu & (a >> 16)) * b) << 16); - res |= (saturate_cast((0xffu & (a >> 24)) * b) << 24); - } - - return res; - } - }; - - struct Div_16sc4_32f : binary_function - { - __device__ __forceinline__ short4 operator ()(short4 a, float b) const - { - return b != 0 ? make_short4(saturate_cast(a.x / b), saturate_cast(a.y / b), - saturate_cast(a.z / b), saturate_cast(a.w / b)) - : make_short4(0,0,0,0); - } - }; - - template struct Div : binary_function - { - __device__ __forceinline__ D operator ()(T a, T b) const - { - return b != 0 ? saturate_cast(a / b) : 0; - } - - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} - }; - template struct Div : binary_function - { - __device__ __forceinline__ float operator ()(T a, T b) const - { - return b != 0 ? static_cast(a) / b : 0; - } - - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} - }; - template struct Div : binary_function - { - __device__ __forceinline__ double operator ()(T a, T b) const - { - return b != 0 ? static_cast(a) / b : 0; - } - - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} - }; - - template struct DivScale : binary_function - { - S scale; - - explicit DivScale(S scale_) : scale(scale_) {} - - __device__ __forceinline__ D operator ()(T a, T b) const - { - return b != 0 ? saturate_cast(scale * a / b) : 0; - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits : arithm::ArithmFuncTraits - { - }; - - template struct TransformFunctorTraits< arithm::Div > : arithm::ArithmFuncTraits - { - }; - - template struct TransformFunctorTraits< arithm::DivScale > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void divMat_8uc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream); - } - - void divMat_16sc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream); - } - - template - void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream) - { - if (scale == 1) - { - Div op; - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); - } - else - { - DivScale op(static_cast(scale)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); - } - } - - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// divScalar - -namespace arithm -{ - template - void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - MulScalar op(static_cast(1.0 / val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// divInv - -namespace arithm -{ - template struct DivInv : unary_function - { - S val; - - explicit DivInv(S val_) : val(val_) {} - - __device__ __forceinline__ D operator ()(T a) const - { - return a != 0 ? saturate_cast(val / a) : 0; - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::DivInv > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - DivInv op(static_cast(val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// absDiffMat - -namespace arithm -{ - struct VAbsDiff4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vabsdiff4(a, b); - } - - __device__ __forceinline__ VAbsDiff4() {} - __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} - }; - - //////////////////////////////////// - - struct VAbsDiff2 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vabsdiff2(a, b); - } - - __device__ __forceinline__ VAbsDiff2() {} - __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} - }; - - //////////////////////////////////// - - __device__ __forceinline__ int _abs(int a) - { - return ::abs(a); - } - __device__ __forceinline__ float _abs(float a) - { - return ::fabsf(a); - } - __device__ __forceinline__ double _abs(double a) - { - return ::fabs(a); - } - - template struct AbsDiffMat : binary_function - { - __device__ __forceinline__ T operator ()(T a, T b) const - { - return saturate_cast(_abs(a - b)); - } - - __device__ __forceinline__ AbsDiffMat() {} - __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template <> struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template struct TransformFunctorTraits< arithm::AbsDiffMat > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void absDiffMat_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream); - } - - void absDiffMat_v2(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream); - } - - template - void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, AbsDiffMat(), WithOutMask(), stream); - } - - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// absDiffScalar - -namespace arithm -{ - template struct AbsDiffScalar : unary_function - { - S val; - - explicit AbsDiffScalar(S val_) : val(val_) {} - - __device__ __forceinline__ T operator ()(T a) const - { - abs_func f; - return saturate_cast(f(a - val)); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::AbsDiffScalar > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - AbsDiffScalar op(static_cast(val)); - - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// absMat - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< abs_func > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, abs_func(), WithOutMask(), stream); - } - - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// sqrMat - -namespace arithm -{ - template struct Sqr : unary_function - { - __device__ __forceinline__ T operator ()(T x) const - { - return saturate_cast(x * x); - } - - __device__ __forceinline__ Sqr() {} - __device__ __forceinline__ Sqr(const Sqr& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::Sqr > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, Sqr(), WithOutMask(), stream); - } - - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// sqrtMat - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< sqrt_func > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, sqrt_func(), WithOutMask(), stream); - } - - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// logMat - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< log_func > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, log_func(), WithOutMask(), stream); - } - - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// expMat - -namespace arithm -{ - template struct Exp : unary_function - { - __device__ __forceinline__ T operator ()(T x) const - { - exp_func f; - return saturate_cast(f(x)); - } - - __device__ __forceinline__ Exp() {} - __device__ __forceinline__ Exp(const Exp& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::Exp > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, Exp(), WithOutMask(), stream); - } - - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////////////////// -// cmpMat - -namespace arithm -{ - struct VCmpEq4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmpeq4(a, b); - } - - __device__ __forceinline__ VCmpEq4() {} - __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {} - }; - struct VCmpNe4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmpne4(a, b); - } - - __device__ __forceinline__ VCmpNe4() {} - __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {} - }; - struct VCmpLt4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmplt4(a, b); - } - - __device__ __forceinline__ VCmpLt4() {} - __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {} - }; - struct VCmpLe4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmple4(a, b); - } - - __device__ __forceinline__ VCmpLe4() {} - __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {} - }; - - //////////////////////////////////// - - template - struct Cmp : binary_function - { - __device__ __forceinline__ uchar operator()(T a, T b) const - { - Op op; - return -op(a, b); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits - { - }; - template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits - { - }; - template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits - { - }; - template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template struct TransformFunctorTraits< arithm::Cmp > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void cmpMatEq_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream); - } - void cmpMatNe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream); - } - void cmpMatLt_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream); - } - void cmpMatLe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream); - } - - template