added VideoWriter_GPU

2012-04-17 07:12:16 +00:00 · 2012-04-17 07:12:16 +00:00 · 2bfaf540a1
commit 2bfaf540a1
parent 0824cf5059
12 changed files with 1772 additions and 7 deletions
--- a/3rdparty/ffmpeg/opencv_ffmpeg.dll
+++ b/3rdparty/ffmpeg/opencv_ffmpeg.dll
--- a/3rdparty/ffmpeg/opencv_ffmpeg_64.dll
+++ b/3rdparty/ffmpeg/opencv_ffmpeg_64.dll
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@ -7,6 +7,8 @@ ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video o
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../highgui/src")
 file(GLOB lib_hdrs               "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
 file(GLOB lib_int_hdrs           "src/*.hpp" "src/*.h")
 file(GLOB lib_cuda_hdrs          "src/cuda/*.hpp" "src/cuda/*.h")
@ -48,7 +50,19 @@ if (HAVE_CUDA)
  OCV_CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
  #CUDA_BUILD_CLEAN_TARGET()
-  set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
+  unset(CUDA_nvcuvid_LIBRARY CACHE)
  find_cuda_helper_libs(nvcuvid)
  if (WIN32)
    unset(CUDA_nvcuvenc_LIBRARY CACHE)
    find_cuda_helper_libs(nvcuvenc)
  endif()
  set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY} ${CUDA_nvcuvid_LIBRARY})
  if (WIN32)
    set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvenc_LIBRARY})
  endif()
 else()
  set(lib_cuda "")
  set(cuda_objs "")
@ -61,7 +75,7 @@ ocv_set_module_sources(
  SOURCES ${lib_int_hdrs} ${lib_cuda_hdrs} ${lib_device_hdrs} ${lib_device_hdrs_detail} ${lib_srcs} ${lib_cuda} ${ncv_files} ${cuda_objs}
  )
-ocv_create_module(${cuda_link_libs})
+ocv_create_module(${cuda_link_libs} ${HIGHGUI_LIBRARIES})
 if(HAVE_CUDA)
  if(HAVE_CUFFT)
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -45,6 +45,7 @@
 #ifndef SKIP_INCLUDES
 #include <vector>
 #include <memory>
 #endif
 #include "opencv2/core/gpumat.hpp"
@ -1884,6 +1885,100 @@ CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
 CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
 ////////////////////////////////// Video Encoding //////////////////////////////////////////
 // Works only under Windows
 // Supports olny H264 video codec and AVI files
 class CV_EXPORTS VideoWriter_GPU
 {
 public:
    struct EncoderParams;
    // Callbacks for video encoder, use it if you want to work with raw video stream
    class EncoderCallBack;
    VideoWriter_GPU();
    VideoWriter_GPU(const std::string& fileName, cv::Size frameSize, double fps);
    VideoWriter_GPU(const std::string& fileName, cv::Size frameSize, double fps, const EncoderParams& params);
    VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps);
    VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params);
    ~VideoWriter_GPU();
    // all methods throws cv::Exception if error occurs
    void open(const std::string& fileName, cv::Size frameSize, double fps);
    void open(const std::string& fileName, cv::Size frameSize, double fps, const EncoderParams& params);
    void open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps);
    void open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params);
    bool isOpened() const;
    void close();
    void write(const cv::gpu::GpuMat& image, bool lastFrame = false);
    struct EncoderParams
    {
        int       P_Interval;      //    NVVE_P_INTERVAL,
        int       IDR_Period;      //    NVVE_IDR_PERIOD,
        int       DynamicGOP;      //    NVVE_DYNAMIC_GOP,
        int       RCType;          //    NVVE_RC_TYPE,
        int       AvgBitrate;      //    NVVE_AVG_BITRATE,
        int       PeakBitrate;     //    NVVE_PEAK_BITRATE,
        int       QP_Level_Intra;  //    NVVE_QP_LEVEL_INTRA,
        int       QP_Level_InterP; //    NVVE_QP_LEVEL_INTER_P,
        int       QP_Level_InterB; //    NVVE_QP_LEVEL_INTER_B,
        int       DeblockMode;     //    NVVE_DEBLOCK_MODE,
        int       ProfileLevel;    //    NVVE_PROFILE_LEVEL,
        int       ForceIntra;      //    NVVE_FORCE_INTRA,
        int       ForceIDR;        //    NVVE_FORCE_IDR,
        int       ClearStat;       //    NVVE_CLEAR_STAT,
        int       DIMode;          //    NVVE_SET_DEINTERLACE,
        int       Presets;         //    NVVE_PRESETS,
        int       DisableCabac;    //    NVVE_DISABLE_CABAC,
        int       NaluFramingType; //    NVVE_CONFIGURE_NALU_FRAMING_TYPE
        int       DisableSPSPPS;   //    NVVE_DISABLE_SPS_PPS
        EncoderParams();
        explicit EncoderParams(const std::string& configFile);
        void load(const std::string& configFile);
        void save(const std::string& configFile) const;
    };
    class EncoderCallBack
    {
    public:
        enum PicType
        {
            IFRAME = 1,
            PFRAME = 2,
            BFRAME = 3
        };
        virtual ~EncoderCallBack() {}
        // callback function to signal the start of bitstream that is to be encoded
        // must return pointer to buffer
        virtual unsigned char* acquireBitStream(int* bufferSize) = 0;
        // callback function to signal that the encoded bitstream is ready to be written to file
        virtual void releaseBitStream(unsigned char* data, int size) = 0;
        // callback function to signal that the encoding operation on the frame has started
        virtual void onBeginFrame(int frameNumber, PicType picType) = 0;
        // callback function signals that the encoding operation on the frame has finished
        virtual void onEndFrame(int frameNumber, PicType picType) = 0;
    };
 private:
    VideoWriter_GPU(const VideoWriter_GPU&);
    VideoWriter_GPU& operator=(const VideoWriter_GPU&);
    class Impl;
    std::auto_ptr<Impl> impl_;
 };
 } // namespace gpu
 } // namespace cv
--- a/modules/gpu/src/cuda/rgb_to_yv12.cu
+++ b/modules/gpu/src/cuda/rgb_to_yv12.cu
@ -0,0 +1,171 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or bpied warranties, including, but not limited to, the bpied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #include "opencv2/gpu/device/common.hpp"
 #include "opencv2/gpu/device/vec_traits.hpp"
 namespace cv { namespace gpu { namespace device 
 {
    namespace video_encoding
    {
        __device__ __forceinline__ void rgbtoy(const uchar b, const uchar g, const uchar r, uchar& y)
        {
            y = static_cast<uchar>(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100);
        }
        __device__ __forceinline__ void rgbtoyuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v)
        {
            rgbtoy(b, g, r, y);
            u = static_cast<uchar>(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100);
            v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
        }
        __global__ void Gray_to_YV12(const DevMem2Db src, PtrStepb dst)
        {
            const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
            const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
            if (x + 1 >= src.cols || y + 1 >= src.rows)
                return;
            // get pointers to the data
            const size_t planeSize = src.rows * dst.step;
            PtrStepb y_plane(dst.data, dst.step);
            PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
            PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
            uchar pix;
            uchar y_val, u_val, v_val;
            pix = src(y, x);
            rgbtoy(pix, pix, pix, y_val);
            y_plane(y, x) = y_val;
            pix = src(y, x + 1);
            rgbtoy(pix, pix, pix, y_val);
            y_plane(y, x + 1) = y_val;
            pix = src(y + 1, x);
            rgbtoy(pix, pix, pix, y_val);
            y_plane(y + 1, x) = y_val;
            pix = src(y + 1, x + 1);
            rgbtoyuv(pix, pix, pix, y_val, u_val, v_val);
            y_plane(y + 1, x + 1) = y_val;
            u_plane(y / 2, x / 2) = u_val;
            v_plane(y / 2, x / 2) = v_val;
        }
        template <typename T>
        __global__ void BGR_to_YV12(const DevMem2D_<T> src, PtrStepb dst)
        {
            const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
            const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
            if (x + 1 >= src.cols || y + 1 >= src.rows)
                return;
            // get pointers to the data
            const size_t planeSize = src.rows * dst.step;
            PtrStepb y_plane(dst.data, dst.step);
            PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
            PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
            T pix;
            uchar y_val, u_val, v_val;
            pix = src(y, x);
            rgbtoy(pix.z, pix.y, pix.x, y_val);
            y_plane(y, x) = y_val;
            pix = src(y, x + 1);
            rgbtoy(pix.z, pix.y, pix.x, y_val);
            y_plane(y, x + 1) = y_val;
            pix = src(y + 1, x);
            rgbtoy(pix.z, pix.y, pix.x, y_val);
            y_plane(y + 1, x) = y_val;
            pix = src(y + 1, x + 1);
            rgbtoyuv(pix.z, pix.y, pix.x, y_val, u_val, v_val);
            y_plane(y + 1, x + 1) = y_val;
            u_plane(y / 2, x / 2) = u_val;
            v_plane(y / 2, x / 2) = v_val;
        }
        void Gray_to_YV12_caller(const DevMem2Db src, PtrStepb dst)
        {
            dim3 block(32, 8);
            dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
            Gray_to_YV12<<<grid, block>>>(src, dst);
            cudaSafeCall( cudaGetLastError() );
            cudaSafeCall( cudaDeviceSynchronize() );
        }
        template <int cn>
        void BGR_to_YV12_caller(const DevMem2Db src, PtrStepb dst)
        {
            typedef typename TypeVec<uchar, cn>::vec_type src_t;
            dim3 block(32, 8);
            dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
            BGR_to_YV12<<<grid, block>>>(static_cast< DevMem2D_<src_t> >(src), dst);
            cudaSafeCall( cudaGetLastError() );
            cudaSafeCall( cudaDeviceSynchronize() );
        }
        void YV12_gpu(const DevMem2Db src, int cn, DevMem2Db dst)
        {
            typedef void (*func_t)(const DevMem2Db src, PtrStepb dst);
            static const func_t funcs[] = 
            {
                0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4>
            };
            funcs[cn](src, dst);
        }
    }
 }}}
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@ -71,16 +71,22 @@
 #ifdef HAVE_CUDA
-    #include "cuda.h"
+    #include <cuda.h>
-    #include "cuda_runtime_api.h"
+    #include <cuda_runtime.h>
-    #include "npp.h"
+    #include <npp.h>
    #ifdef HAVE_CUFFT
-        #include "cufft.h"
+        #include <cufft.h>
    #endif
    #ifdef HAVE_CUBLAS
-        #include "cublas.h"
+        #include <cublas.h>
    #endif
    #include <nvcuvid.h>
    #ifdef WIN32
        #include <NVEncoderAPI.h>
    #endif
    #include "internal_shared.hpp"
--- a/modules/gpu/src/video_writer.cpp
+++ b/modules/gpu/src/video_writer.cpp
@ -0,0 +1,724 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #include "precomp.hpp"
 #if !defined HAVE_CUDA || !defined WIN32
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU() { throw_nogpu(); }
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const std::string&, cv::Size, double) { throw_nogpu(); }
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const std::string&, cv::Size, double, const EncoderParams&) { throw_nogpu(); }
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr<EncoderCallBack>&, cv::Size, double) { throw_nogpu(); }
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr<EncoderCallBack>&, cv::Size, double, const EncoderParams&) { throw_nogpu(); }
 cv::gpu::VideoWriter_GPU::~VideoWriter_GPU() {}
 void cv::gpu::VideoWriter_GPU::open(const std::string&, cv::Size, double) { throw_nogpu(); }
 void cv::gpu::VideoWriter_GPU::open(const std::string&, cv::Size, double, const EncoderParams&) { throw_nogpu(); }
 void cv::gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>&, cv::Size, double) { throw_nogpu(); }
 void cv::gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>&, cv::Size, double, const EncoderParams&) { throw_nogpu(); }
 bool cv::gpu::VideoWriter_GPU::isOpened() const { return false; }
 void cv::gpu::VideoWriter_GPU::close() {}
 void cv::gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat&, bool) { throw_nogpu(); }
 cv::gpu::VideoWriter_GPU::EncoderParams::EncoderParams() { throw_nogpu(); }
 cv::gpu::VideoWriter_GPU::EncoderParams::EncoderParams(const std::string&) { throw_nogpu(); }
 void cv::gpu::VideoWriter_GPU::EncoderParams::load(const std::string&) { throw_nogpu(); }
 void cv::gpu::VideoWriter_GPU::EncoderParams::save(const std::string&) const { throw_nogpu(); }
 #else // !defined HAVE_CUDA || !defined WIN32
 #ifdef HAVE_FFMPEG
    #ifdef NEW_FFMPEG
        #include "cap_ffmpeg_impl_v2.hpp"
    #else
        #include "cap_ffmpeg_impl.hpp"
    #endif
 #else
    #include "cap_ffmpeg_api.hpp"
 #endif
 ///////////////////////////////////////////////////////////////////////////
 // VideoWriter_GPU::Impl
 namespace
 {
    class NVEncoderWrapper
    {
    public:
        NVEncoderWrapper() : encoder_(0)
        {
            int err;
            err = NVGetHWEncodeCaps();
            if (err)
                CV_Error(CV_GpuNotSupported, "No CUDA capability present");
            // Create the Encoder API Interface
            err = NVCreateEncoder(&encoder_);
            CV_Assert( err == 0 );
        }
        ~NVEncoderWrapper()
        {
            if (encoder_)
                NVDestroyEncoder(encoder_);
        }
        operator NVEncoder() const
        {
            return encoder_;
        }
    private:
        NVEncoder encoder_;
    };
    enum CodecType
    {
        MPEG1, //not supported yet
        MPEG2, //not supported yet
        MPEG4, //not supported yet
        H264
    };
 }
 class cv::gpu::VideoWriter_GPU::Impl
 {
 public:
    Impl(const cv::Ptr<EncoderCallBack>& callback, cv::Size frameSize, double fps, CodecType codec = H264);
    Impl(const cv::Ptr<EncoderCallBack>& callback, cv::Size frameSize, double fps, const EncoderParams& params, CodecType codec = H264);
    void write(const cv::gpu::GpuMat& image, bool lastFrame);
 private:
    Impl(const Impl&);
    Impl& operator=(const Impl&);
    void initEncoder(double fps);
    void setEncodeParams(const EncoderParams& params);
    void initGpuMemory();
    void initCallBacks();
    void createHWEncoder();
    cv::Ptr<EncoderCallBack> callback_;
    cv::Size frameSize_;
    CodecType codec_;
    NVVE_SurfaceFormat surfaceFormat_;
    NVEncoderWrapper encoder_;
    cv::gpu::GpuMat videoFrame_;
    CUvideoctxlock cuCtxLock_;
    // CallBacks
    static unsigned char* NVENCAPI HandleAcquireBitStream(int* pBufferSize, void* pUserdata);
    static void NVENCAPI HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata);
    static void NVENCAPI HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata);
    static void NVENCAPI HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata);
 };
 cv::gpu::VideoWriter_GPU::Impl::Impl(const cv::Ptr<EncoderCallBack>& callback, cv::Size frameSize, double fps, CodecType codec) :
    callback_(callback),
    frameSize_(frameSize),
    codec_(codec),
    surfaceFormat_(YV12),
    cuCtxLock_(0)
 {
    initEncoder(fps);
    initGpuMemory();
    initCallBacks();
    createHWEncoder();
 }
 cv::gpu::VideoWriter_GPU::Impl::Impl(const cv::Ptr<EncoderCallBack>& callback, cv::Size frameSize, double fps, const EncoderParams& params, CodecType codec) :
    callback_(callback),
    frameSize_(frameSize),
    codec_(codec),
    surfaceFormat_(YV12),
    cuCtxLock_(0)
 {
    initEncoder(fps);
    setEncodeParams(params);
    initGpuMemory();
    initCallBacks();
    createHWEncoder();
 }
 void cv::gpu::VideoWriter_GPU::Impl::initEncoder(double fps)
 {
    int err;
    // Set codec
    static const unsigned long codecs_id[] = 
    {
        NV_CODEC_TYPE_MPEG1, NV_CODEC_TYPE_MPEG2, NV_CODEC_TYPE_MPEG4, NV_CODEC_TYPE_H264, NV_CODEC_TYPE_VC1
    };
    err = NVSetCodec(encoder_, codecs_id[codec_]);
    if (err)
        CV_Error(CV_StsNotImplemented, "Codec format is not supported");
    // Set default params
    err = NVSetDefaultParam(encoder_);
    CV_Assert( err == 0 );
    // Set some common params
    int inputSize[] = { frameSize_.width, frameSize_.height };
    err = NVSetParamValue(encoder_, NVVE_IN_SIZE, &inputSize);
    CV_Assert( err == 0 );
    err = NVSetParamValue(encoder_, NVVE_OUT_SIZE, &inputSize);
    CV_Assert( err == 0 );
    //int aspectRatio[] = { frameSize_.width, frameSize_.height, ASPECT_RATIO_DAR };
    int aspectRatio[] = { 16, 9, ASPECT_RATIO_DAR };
    err = NVSetParamValue(encoder_, NVVE_ASPECT_RATIO, &aspectRatio);
    CV_Assert( err == 0 );
    // FPS
    int frame_rate = static_cast<int>(fps + 0.5);
    int frame_rate_base = 1;
    while (fabs(static_cast<double>(frame_rate) / frame_rate_base) - fps > 0.001)
    {
        frame_rate_base *= 10;
        frame_rate = static_cast<int>(fps*frame_rate_base + 0.5);
    }
    int FrameRate[] = { frame_rate, frame_rate_base };
    err = NVSetParamValue(encoder_, NVVE_FRAME_RATE, &FrameRate);
    CV_Assert( err == 0 );
    // Select device for encoding
    int gpuID = cv::gpu::getDevice();
    err = NVSetParamValue(encoder_, NVVE_FORCE_GPU_SELECTION, &gpuID);
    CV_Assert( err == 0 );
 }
 void cv::gpu::VideoWriter_GPU::Impl::setEncodeParams(const EncoderParams& params)
 {
    int err;
    int P_Interval = params.P_Interval;
    err = NVSetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval);
    CV_Assert( err == 0 );
    int IDR_Period = params.IDR_Period;
    err = NVSetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period);
    CV_Assert( err == 0 );
    int DynamicGOP = params.DynamicGOP;
    err = NVSetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP);
    CV_Assert( err == 0 );
    NVVE_RateCtrlType RCType = static_cast<NVVE_RateCtrlType>(params.RCType);
    err = NVSetParamValue(encoder_, NVVE_RC_TYPE, &RCType);
    CV_Assert( err == 0 );
    int AvgBitrate = params.AvgBitrate;
    err = NVSetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate);
    CV_Assert( err == 0 );
    int PeakBitrate = params.PeakBitrate;
    err = NVSetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate);
    CV_Assert( err == 0 );
    int QP_Level_Intra = params.QP_Level_Intra;
    err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra);
    CV_Assert( err == 0 );
    int QP_Level_InterP = params.QP_Level_InterP;
    err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP);
    CV_Assert( err == 0 );
    int QP_Level_InterB = params.QP_Level_InterB;
    err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB);
    CV_Assert( err == 0 );
    int DeblockMode = params.DeblockMode;
    err = NVSetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode);
    CV_Assert( err == 0 );
    int ProfileLevel = params.ProfileLevel;
    err = NVSetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel);
    CV_Assert( err == 0 );
    int ForceIntra = params.ForceIntra;
    err = NVSetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra);
    CV_Assert( err == 0 );
    int ForceIDR = params.ForceIDR;
    err = NVSetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR);
    CV_Assert( err == 0 );
    int ClearStat = params.ClearStat;
    err = NVSetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat);
    CV_Assert( err == 0 );
    NVVE_DI_MODE DIMode = static_cast<NVVE_DI_MODE>(params.DIMode);
    err = NVSetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode);
    CV_Assert( err == 0 );
    if (params.Presets != -1)
    {
        NVVE_PRESETS_TARGET Presets = static_cast<NVVE_PRESETS_TARGET>(params.Presets);
        err = NVSetParamValue(encoder_, NVVE_PRESETS, &Presets);
        CV_Assert ( err == 0 );
    }
    int DisableCabac = params.DisableCabac;
    err = NVSetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac);
    CV_Assert ( err == 0 );
    int NaluFramingType = params.NaluFramingType;
    err = NVSetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType);
    CV_Assert ( err == 0 );
    int DisableSPSPPS = params.DisableSPSPPS;
    err = NVSetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS);
    CV_Assert ( err == 0 );
 }
 void cv::gpu::VideoWriter_GPU::Impl::initGpuMemory()
 {
    int err;
    CUresult cuRes;
    // initialize context
    cv::gpu::GpuMat temp(1, 1, CV_8U);
    temp.release();
    static const int bpp[] =
    {
        16, // UYVY, 4:2:2
        16, // YUY2, 4:2:2
        12, // YV12, 4:2:0
        12, // NV12, 4:2:0
        12, // IYUV, 4:2:0
    };
    CUcontext cuContext;
    cuRes = cuCtxGetCurrent(&cuContext);
    CV_Assert( cuRes == CUDA_SUCCESS );
    // Allocate the CUDA memory Pitched Surface
    if (surfaceFormat_ == UYVY || surfaceFormat_ == YUY2)
        videoFrame_.create(frameSize_.height, (frameSize_.width * bpp[surfaceFormat_]) / 8, CV_8UC1);
    else
        videoFrame_.create((frameSize_.height * bpp[surfaceFormat_]) / 8, frameSize_.width, CV_8UC1);
    // Create the Video Context Lock (used for synchronization)
    cuRes = cuvidCtxLockCreate(&cuCtxLock_, cuContext);
    CV_Assert( cuRes == CUDA_SUCCESS );
    // If we are using GPU Device Memory with NVCUVENC, it is necessary to create a 
    // CUDA Context with a Context Lock cuvidCtxLock.  The Context Lock needs to be passed to NVCUVENC
    int iUseDeviceMem = 1;
    err = NVSetParamValue(encoder_, NVVE_DEVICE_MEMORY_INPUT, &iUseDeviceMem);
    CV_Assert ( err == 0 );
    err = NVSetParamValue(encoder_, NVVE_DEVICE_CTX_LOCK, &cuCtxLock_);
    CV_Assert ( err == 0 );
 }
 void cv::gpu::VideoWriter_GPU::Impl::initCallBacks()
 {
    NVVE_CallbackParams cb;
    memset(&cb, 0, sizeof(NVVE_CallbackParams));
    cb.pfnacquirebitstream = HandleAcquireBitStream;
    cb.pfnonbeginframe     = HandleOnBeginFrame;
    cb.pfnonendframe       = HandleOnEndFrame;
    cb.pfnreleasebitstream = HandleReleaseBitStream;
    NVRegisterCB(encoder_, cb, this);
 }
 void cv::gpu::VideoWriter_GPU::Impl::createHWEncoder()
 {
    int err;
    // Create the NVIDIA HW resources for Encoding on NVIDIA hardware
    err = NVCreateHWEncoder(encoder_);
    CV_Assert( err == 0 );
 }
 namespace cv { namespace gpu { namespace device 
 {
    namespace video_encoding
    {
        void YV12_gpu(const DevMem2Db src, int cn, DevMem2Db dst);
    }
 }}}
 void cv::gpu::VideoWriter_GPU::Impl::write(const cv::gpu::GpuMat& frame, bool lastFrame)
 {
    CV_Assert( frame.size() == frameSize_ );
    CV_Assert( frame.type() == CV_8UC1 || frame.type() == CV_8UC3 || frame.type() == CV_8UC4 );
    NVVE_EncodeFrameParams efparams;
    efparams.Width = frameSize_.width;
    efparams.Height = frameSize_.height;
    efparams.Pitch = static_cast<int>(videoFrame_.step);
    efparams.SurfFmt = surfaceFormat_;
    efparams.PictureStruc = FRAME_PICTURE;
    efparams.topfieldfirst =  0;
    efparams.repeatFirstField = 0;
    efparams.progressiveFrame = (surfaceFormat_ == NV12) ? 1 : 0;
    efparams.bLast = lastFrame;
    efparams.picBuf = 0; // Must be set to NULL in order to support device memory input
    // Don't forget we need to lock/unlock between memcopies
    CUresult res = cuvidCtxLock(cuCtxLock_, 0);
    CV_Assert( res == CUDA_SUCCESS );
    if (surfaceFormat_ == YV12)
        cv::gpu::device::video_encoding::YV12_gpu(frame, frame.channels(), videoFrame_);
    res = cuvidCtxUnlock(cuCtxLock_, 0);
    CV_Assert( res == CUDA_SUCCESS );
    int err = NVEncodeFrame(encoder_, &efparams, 0, videoFrame_.data);
    CV_Assert( err == 0 );
 }
 unsigned char* NVENCAPI cv::gpu::VideoWriter_GPU::Impl::HandleAcquireBitStream(int* pBufferSize, void* pUserdata)
 {
    Impl* thiz = static_cast<Impl*>(pUserdata);
    return thiz->callback_->acquireBitStream(pBufferSize);
 }
 void NVENCAPI cv::gpu::VideoWriter_GPU::Impl::HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata)
 {
    Impl* thiz = static_cast<Impl*>(pUserdata);
    thiz->callback_->releaseBitStream(cb, nBytesInBuffer);
 }
 void NVENCAPI cv::gpu::VideoWriter_GPU::Impl::HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata)
 {
    Impl* thiz = static_cast<Impl*>(pUserdata);
    thiz->callback_->onBeginFrame(pbfi->nFrameNumber, static_cast<EncoderCallBack::PicType>(pbfi->nPicType));
 }
 void NVENCAPI cv::gpu::VideoWriter_GPU::Impl::HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata)
 {
    Impl* thiz = static_cast<Impl*>(pUserdata);
    thiz->callback_->onEndFrame(pefi->nFrameNumber, static_cast<EncoderCallBack::PicType>(pefi->nPicType));
 }
 ///////////////////////////////////////////////////////////////////////////
 // FFMPEG
 class EncoderCallBackFFMPEG : public cv::gpu::VideoWriter_GPU::EncoderCallBack
 {
 public:
    EncoderCallBackFFMPEG(const std::string& fileName, cv::Size frameSize, double fps);
    ~EncoderCallBackFFMPEG();
    unsigned char* acquireBitStream(int* bufferSize);
    void releaseBitStream(unsigned char* data, int size);
    void onBeginFrame(int frameNumber, PicType picType);
    void onEndFrame(int frameNumber, PicType picType);
 private:
    EncoderCallBackFFMPEG(const EncoderCallBackFFMPEG&);
    EncoderCallBackFFMPEG& operator=(const EncoderCallBackFFMPEG&);
    struct OutputMediaStream_FFMPEG* stream_;
    std::vector<uchar> buf_;
 };
 namespace
 {
    Create_OutputMediaStream_FFMPEG_Plugin create_OutputMediaStream_FFMPEG_p = 0;
    Release_OutputMediaStream_FFMPEG_Plugin release_OutputMediaStream_FFMPEG_p = 0;
    Write_OutputMediaStream_FFMPEG_Plugin write_OutputMediaStream_FFMPEG_p = 0;
    bool init_MediaStream_FFMPEG()
    {
        static bool initialized = 0;
        if (!initialized)
        {
        #if defined WIN32 || defined _WIN32
            const char* module_name = "opencv_ffmpeg"
            #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
                "_64"
            #endif
                ".dll";
            static HMODULE cvFFOpenCV = LoadLibrary(module_name);
            if (cvFFOpenCV)
            {
                create_OutputMediaStream_FFMPEG_p =
                    (Create_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "create_OutputMediaStream_FFMPEG");
                release_OutputMediaStream_FFMPEG_p =
                    (Release_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "release_OutputMediaStream_FFMPEG");
                write_OutputMediaStream_FFMPEG_p =
                    (Write_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "write_OutputMediaStream_FFMPEG");
                initialized = create_OutputMediaStream_FFMPEG_p != 0 && release_OutputMediaStream_FFMPEG_p != 0 && write_OutputMediaStream_FFMPEG_p != 0;
            }
        #elif defined HAVE_FFMPEG
            create_OutputMediaStream_FFMPEG_p = create_OutputMediaStream_FFMPEG;
            release_OutputMediaStream_FFMPEG_p = release_OutputMediaStream_FFMPEG;
            write_OutputMediaStream_FFMPEG_p = write_OutputMediaStream_FFMPEG;
            initialized = true;
        #endif
        }
        return initialized;
    }
 }
 EncoderCallBackFFMPEG::EncoderCallBackFFMPEG(const std::string& fileName, cv::Size frameSize, double fps) :
    stream_(0)
 {
    int buf_size = std::max(frameSize.area() * 4, 1024 * 1024);
    buf_.resize(buf_size);
    CV_Assert( init_MediaStream_FFMPEG() );
    stream_ = create_OutputMediaStream_FFMPEG_p(fileName.c_str(), frameSize.width, frameSize.height, fps);
    CV_Assert( stream_ != 0 );
 }
 EncoderCallBackFFMPEG::~EncoderCallBackFFMPEG()
 {
    release_OutputMediaStream_FFMPEG_p(stream_);
 }
 unsigned char* EncoderCallBackFFMPEG::acquireBitStream(int* bufferSize)
 {
    *bufferSize = static_cast<int>(buf_.size());
    return &buf_[0];
 }
 void EncoderCallBackFFMPEG::releaseBitStream(unsigned char* data, int size)
 {
    write_OutputMediaStream_FFMPEG_p(stream_, data, size);
 }
 void EncoderCallBackFFMPEG::onBeginFrame(int frameNumber, PicType picType)
 {
 }
 void EncoderCallBackFFMPEG::onEndFrame(int frameNumber, PicType picType)
 {
 }
 ///////////////////////////////////////////////////////////////////////////
 // VideoWriter_GPU
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU()
 {
 }
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const std::string& fileName, cv::Size frameSize, double fps)
 {
    open(fileName, frameSize, fps);
 }
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const std::string& fileName, cv::Size frameSize, double fps, const EncoderParams& params)
 {
    open(fileName, frameSize, fps, params);
 }
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps)
 {
    open(encoderCallback, frameSize, fps);
 }
 cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params)
 {
    open(encoderCallback, frameSize, fps, params);
 }
 cv::gpu::VideoWriter_GPU::~VideoWriter_GPU()
 {
    close();
 }
 void cv::gpu::VideoWriter_GPU::open(const std::string& fileName, cv::Size frameSize, double fps)
 {
    close();
    cv::Ptr<EncoderCallBack> encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps));
    open(encoderCallback, frameSize, fps);
 }
 void cv::gpu::VideoWriter_GPU::open(const std::string& fileName, cv::Size frameSize, double fps, const EncoderParams& params)
 {
    close();
    cv::Ptr<EncoderCallBack> encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps));
    open(encoderCallback, frameSize, fps, params);
 }
 void cv::gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps)
 {
    close();
    impl_.reset(new Impl(encoderCallback, frameSize, fps));
 }
 void cv::gpu::VideoWriter_GPU::open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params)
 {
    close();
    impl_.reset(new Impl(encoderCallback, frameSize, fps, params));
 }
 bool cv::gpu::VideoWriter_GPU::isOpened() const
 {
    return impl_.get() != 0;
 }
 void cv::gpu::VideoWriter_GPU::close()
 {
    impl_.reset();
 }
 void cv::gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat& image, bool lastFrame)
 {
    CV_Assert( isOpened() );
    impl_->write(image, lastFrame);
 }
 ///////////////////////////////////////////////////////////////////////////
 // VideoWriter_GPU::EncoderParams
 cv::gpu::VideoWriter_GPU::EncoderParams::EncoderParams()
 {
    P_Interval = 3;
    IDR_Period = 15;
    DynamicGOP = 0;
    RCType = 1;
    AvgBitrate = 4000000;
    PeakBitrate = 10000000;
    QP_Level_Intra = 25;
    QP_Level_InterP = 28;
    QP_Level_InterB = 31;
    DeblockMode = 1;
    ProfileLevel = 65357;
    ForceIntra = 0;
    ForceIDR = 0;
    ClearStat = 0;
    DIMode = 1;
    Presets = 2;
    DisableCabac = 0;
    NaluFramingType = 0;
    DisableSPSPPS = 0;
 }
 cv::gpu::VideoWriter_GPU::EncoderParams::EncoderParams(const std::string& configFile)
 {
    load(configFile);
 }
 void cv::gpu::VideoWriter_GPU::EncoderParams::load(const std::string& configFile)
 {
    cv::FileStorage fs(configFile, cv::FileStorage::READ);
    CV_Assert( fs.isOpened() );
    cv::read(fs["P_Interval"     ], P_Interval, 3);
    cv::read(fs["IDR_Period"     ], IDR_Period, 15);
    cv::read(fs["DynamicGOP"     ], DynamicGOP, 0);
    cv::read(fs["RCType"         ], RCType, 1);
    cv::read(fs["AvgBitrate"     ], AvgBitrate, 4000000);
    cv::read(fs["PeakBitrate"    ], PeakBitrate, 10000000);
    cv::read(fs["QP_Level_Intra" ], QP_Level_Intra, 25);
    cv::read(fs["QP_Level_InterP"], QP_Level_InterP, 28);
    cv::read(fs["QP_Level_InterB"], QP_Level_InterB, 31);
    cv::read(fs["DeblockMode"    ], DeblockMode, 1);
    cv::read(fs["ProfileLevel"   ], ProfileLevel, 65357);
    cv::read(fs["ForceIntra"     ], ForceIntra, 0);
    cv::read(fs["ForceIDR"       ], ForceIDR, 0);
    cv::read(fs["ClearStat"      ], ClearStat, 0);
    cv::read(fs["DIMode"         ], DIMode, 1);
    cv::read(fs["Presets"        ], Presets, 2);
    cv::read(fs["DisableCabac"   ], DisableCabac, 0);
    cv::read(fs["NaluFramingType"], NaluFramingType, 0);
    cv::read(fs["DisableSPSPPS"  ], DisableSPSPPS, 0);
 }
 void cv::gpu::VideoWriter_GPU::EncoderParams::save(const std::string& configFile) const
 {
    cv::FileStorage fs(configFile, cv::FileStorage::WRITE);
    CV_Assert( fs.isOpened() );
    cv::write(fs, "P_Interval"     , P_Interval);
    cv::write(fs, "IDR_Period"     , IDR_Period);
    cv::write(fs, "DynamicGOP"     , DynamicGOP);
    cv::write(fs, "RCType"         , RCType);
    cv::write(fs, "AvgBitrate"     , AvgBitrate);
    cv::write(fs, "PeakBitrate"    , PeakBitrate);
    cv::write(fs, "QP_Level_Intra" , QP_Level_Intra);
    cv::write(fs, "QP_Level_InterP", QP_Level_InterP);
    cv::write(fs, "QP_Level_InterB", QP_Level_InterB);
    cv::write(fs, "DeblockMode"    , DeblockMode);
    cv::write(fs, "ProfileLevel"   , ProfileLevel);
    cv::write(fs, "ForceIntra"     , ForceIntra);
    cv::write(fs, "ForceIDR"       , ForceIDR);
    cv::write(fs, "ClearStat"      , ClearStat);
    cv::write(fs, "DIMode"         , DIMode);
    cv::write(fs, "Presets"        , Presets);
    cv::write(fs, "DisableCabac"   , DisableCabac);
    cv::write(fs, "NaluFramingType", NaluFramingType);
    cv::write(fs, "DisableSPSPPS"  , DisableSPSPPS);
 }
 #endif // !defined HAVE_CUDA || !defined WIN32
--- a/modules/gpu/test/test_video.cpp
+++ b/modules/gpu/test/test_video.cpp
@ -384,4 +384,66 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, FarnebackOpticalFlow, testing::Combine(
    testing::Values(FarnebackOptFlowFlags(0), FarnebackOptFlowFlags(cv::OPTFLOW_FARNEBACK_GAUSSIAN)),
    testing::Values(UseInitFlow(false), UseInitFlow(true))));
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // VideoWriter
 PARAM_TEST_CASE(VideoWriter, cv::gpu::DeviceInfo, std::string)
 {
    cv::gpu::DeviceInfo devInfo;
    std::string inputFile;
    std::string outputFile;
    virtual void SetUp()
    {
        devInfo = GET_PARAM(0);
        inputFile = GET_PARAM(1);
        cv::gpu::setDevice(devInfo.deviceID());
        inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + inputFile;
        outputFile = inputFile.substr(0, inputFile.find('.')) + "_test.avi";
    }
 };
 TEST_P(VideoWriter, Regression)
 {
    const double FPS = 25.0;
    cv::VideoCapture reader(inputFile);
    ASSERT_TRUE( reader.isOpened() );
    cv::gpu::VideoWriter_GPU d_writer;
    cv::Mat frame;
    std::vector<cv::Mat> frames;
    cv::gpu::GpuMat d_frame;
    for (int i = 1; i < 10; ++i)
    {
        reader >> frame;
        if (frame.empty())
            break;
        frames.push_back(frame.clone());
        d_frame.upload(frame);
        if (!d_writer.isOpened())
            d_writer.open(outputFile, frame.size(), FPS);
        d_writer.write(d_frame);
    }
    reader.release();
    d_writer.close();
    reader.open(outputFile);
    ASSERT_TRUE( reader.isOpened() );
 }
 INSTANTIATE_TEST_CASE_P(GPU_Video, VideoWriter, testing::Combine(
    ALL_DEVICES,
    testing::Values("VID00003-20100701-2204.3GP", "big_buck_bunny.mpg")));
 } // namespace
--- a/modules/highgui/src/cap_ffmpeg_api.hpp
+++ b/modules/highgui/src/cap_ffmpeg_api.hpp
@ -65,6 +65,18 @@ typedef int (*CvWriteFrame_Plugin)( void* writer_handle, const unsigned char* da
                                    int width, int height, int cn, int origin);
 typedef void (*CvReleaseVideoWriter_Plugin)( void** writer );
 /*
 * For CUDA encoder
 */
 OPENCV_FFMPEG_API struct OutputMediaStream_FFMPEG* create_OutputMediaStream_FFMPEG(const char* fileName, int width, int height, double fps);
 OPENCV_FFMPEG_API void release_OutputMediaStream_FFMPEG(struct OutputMediaStream_FFMPEG* stream);
 OPENCV_FFMPEG_API void write_OutputMediaStream_FFMPEG(struct OutputMediaStream_FFMPEG* stream, unsigned char* data, int size);
 typedef struct OutputMediaStream_FFMPEG* (*Create_OutputMediaStream_FFMPEG_Plugin)(const char* fileName, int width, int height, double fps);
 typedef void (*Release_OutputMediaStream_FFMPEG_Plugin)(struct OutputMediaStream_FFMPEG* stream);
 typedef void (*Write_OutputMediaStream_FFMPEG_Plugin)(struct OutputMediaStream_FFMPEG* stream, unsigned char* data, int size);
 #ifdef __cplusplus
 }
 #endif
--- a/modules/highgui/src/cap_ffmpeg_impl.hpp
+++ b/modules/highgui/src/cap_ffmpeg_impl.hpp
@ -1446,3 +1446,295 @@ void CvVideoWriter_FFMPEG::close()
        return writer->writeFrame(data, step, width, height, cn, origin);
    }
 /*
 * For CUDA encoder
 */
 struct OutputMediaStream_FFMPEG
 {
    bool open(const char* fileName, int width, int height, double fps);
    void write(unsigned char* data, int size);
    void close();
    // add a video output stream to the container
    static AVStream* addVideoStream(AVFormatContext *oc, CodecID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format);
    AVOutputFormat* fmt_;
    AVFormatContext* oc_;
    AVStream* video_st_;
 };
 void OutputMediaStream_FFMPEG::close()
 {
    // no more frame to compress. The codec has a latency of a few
    // frames if using B frames, so we get the last frames by
    // passing the same picture again
    // TODO -- do we need to account for latency here?
    if (oc_)
    {
        // write the trailer, if any
        av_write_trailer(oc_);
        // free the streams
        for (unsigned int i = 0; i < oc_->nb_streams; ++i)
        {
            av_freep(&oc_->streams[i]->codec);
            av_freep(&oc_->streams[i]);
        }
        if (!(fmt_->flags & AVFMT_NOFILE) && oc_->pb)
        {
            // close the output file
            #if LIBAVCODEC_VERSION_INT < ((52<<16)+(123<<8)+0)
                #if LIBAVCODEC_VERSION_INT >= ((51<<16)+(49<<8)+0)
                    url_fclose(oc_->pb);
                #else
                    url_fclose(&oc_->pb);
                #endif
            #else
                avio_close(oc_->pb);
            #endif
        }
        // free the stream
        av_free(oc_);
    }
 }
 AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CodecID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format)
 {
    #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 10, 0)
        AVStream* st = avformat_new_stream(oc, 0);
    #else
        AVStream* st = av_new_stream(oc, 0);
    #endif
    if (!st)
        return 0;
    #if LIBAVFORMAT_BUILD > 4628
        AVCodecContext* c = st->codec;
    #else
        AVCodecContext* c = &(st->codec);
    #endif
    c->codec_id = codec_id;
    c->codec_type = AVMEDIA_TYPE_VIDEO;
    // put sample parameters
    unsigned long long lbit_rate = static_cast<unsigned long long>(bitrate);
    lbit_rate += (bitrate / 4);
    lbit_rate = std::min(lbit_rate, static_cast<unsigned long long>(std::numeric_limits<int>::max()));
    c->bit_rate = bitrate;
    // took advice from
    // http://ffmpeg-users.933282.n4.nabble.com/warning-clipping-1-dct-coefficients-to-127-127-td934297.html
    c->qmin = 3;
    // resolution must be a multiple of two
    c->width = w;
    c->height = h;
    AVCodec* codec = avcodec_find_encoder(c->codec_id);
    // time base: this is the fundamental unit of time (in seconds) in terms
    // of which frame timestamps are represented. for fixed-fps content,
    // timebase should be 1/framerate and timestamp increments should be
    // identically 1
    int frame_rate = static_cast<int>(fps+0.5);
    int frame_rate_base = 1;
    while (fabs(static_cast<double>(frame_rate)/frame_rate_base) - fps > 0.001)
    {
        frame_rate_base *= 10;
        frame_rate = static_cast<int>(fps*frame_rate_base + 0.5);
    }
    c->time_base.den = frame_rate;
    c->time_base.num = frame_rate_base;
    #if LIBAVFORMAT_BUILD > 4752
        // adjust time base for supported framerates
        if (codec && codec->supported_framerates)
        {
            AVRational req = {frame_rate, frame_rate_base};
            const AVRational* best = NULL;
            AVRational best_error = {INT_MAX, 1};
            for (const AVRational* p = codec->supported_framerates; p->den!=0; ++p)
            {
                AVRational error = av_sub_q(req, *p);
                if (error.num < 0) 
                    error.num *= -1;
                if (av_cmp_q(error, best_error) < 0)
                {
                    best_error= error;
                    best= p;
                }
            }
            c->time_base.den= best->num;
            c->time_base.num= best->den;
        }
    #endif
    c->gop_size = 12; // emit one intra frame every twelve frames at most
    c->pix_fmt = pixel_format;
    if (c->codec_id == CODEC_ID_MPEG2VIDEO)
        c->max_b_frames = 2;
    if (c->codec_id == CODEC_ID_MPEG1VIDEO || c->codec_id == CODEC_ID_MSMPEG4V3)
    {
        // needed to avoid using macroblocks in which some coeffs overflow
        // this doesnt happen with normal video, it just happens here as the
        // motion of the chroma plane doesnt match the luma plane
        // avoid FFMPEG warning 'clipping 1 dct coefficients...'
        c->mb_decision = 2;
    }
    #if LIBAVCODEC_VERSION_INT > 0x000409
        // some formats want stream headers to be seperate
        if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        {
            c->flags |= CODEC_FLAG_GLOBAL_HEADER;
        }
    #endif
    return st;
 }
 bool OutputMediaStream_FFMPEG::open(const char* fileName, int width, int height, double fps)
 {
    fmt_ = 0;
    oc_ = 0;
    video_st_ = 0;
    // tell FFMPEG to register codecs
    av_register_all();
    av_log_set_level(AV_LOG_ERROR);
    // auto detect the output format from the name and fourcc code
    #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0)
        fmt_ = av_guess_format(NULL, fileName, NULL);
    #else
        fmt_ = guess_format(NULL, fileName, NULL);
    #endif
    if (!fmt_)
        return false;
    CodecID codec_id = CODEC_ID_H264;
    // alloc memory for context
    #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0)
        oc_ = avformat_alloc_context();
    #else
        oc_ = av_alloc_format_context();
    #endif
    if (!oc_)
        return false;
    // set some options
    oc_->oformat = fmt_;
    snprintf(oc_->filename, sizeof(oc_->filename), "%s", fileName);
    oc_->max_delay = (int)(0.7 * AV_TIME_BASE); // This reduces buffer underrun warnings with MPEG
    // set a few optimal pixel formats for lossless codecs of interest..
    PixelFormat codec_pix_fmt = PIX_FMT_YUV420P;
    int bitrate_scale = 64;
    // TODO -- safe to ignore output audio stream?
    video_st_ = addVideoStream(oc_, codec_id, width, height, width * height * bitrate_scale, fps, codec_pix_fmt);
    if (!video_st_)
        return false;
    // set the output parameters (must be done even if no parameters)
    #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(53, 2, 0)
        if (av_set_parameters(oc_, NULL) < 0)
            return false;
    #endif
    // now that all the parameters are set, we can open the audio and
    // video codecs and allocate the necessary encode buffers
    #if LIBAVFORMAT_BUILD > 4628
        AVCodecContext* c = (video_st_->codec);
    #else
        AVCodecContext* c = &(video_st_->codec);
    #endif
    c->codec_tag = MKTAG('H', '2', '6', '4');
    c->bit_rate_tolerance = c->bit_rate;
    // open the output file, if needed
    if (!(fmt_->flags & AVFMT_NOFILE))
    {
        #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(53, 2, 0)
            int err = url_fopen(&oc_->pb, fileName, URL_WRONLY);
        #else
            int err = avio_open(&oc_->pb, fileName, AVIO_FLAG_WRITE);
        #endif
        if (err != 0)
            return false;
    }
    // write the stream header, if any
    #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(53, 2, 0)
        av_write_header(oc_);
    #else
        avformat_write_header(oc_, NULL);
    #endif
    return true;
 }
 void OutputMediaStream_FFMPEG::write(unsigned char* data, int size)
 {
    // if zero size, it means the image was buffered
    if (size > 0) 
    {
        AVPacket pkt;
        av_init_packet(&pkt);
        pkt.stream_index = video_st_->index;
        pkt.data = data;
        pkt.size = size;
        // write the compressed frame in the media file
        av_write_frame(oc_, &pkt);
    }
 }
 struct OutputMediaStream_FFMPEG* create_OutputMediaStream_FFMPEG(const char* fileName, int width, int height, double fps)
 {
    OutputMediaStream_FFMPEG* stream = (OutputMediaStream_FFMPEG*) malloc(sizeof(OutputMediaStream_FFMPEG));
    if (stream->open(fileName, width, height, fps))
        return stream;
    stream->close();
    free(stream);
    return 0;
 }
 void release_OutputMediaStream_FFMPEG(struct OutputMediaStream_FFMPEG* stream)
 {
    stream->close();
    free(stream);
 }
 void write_OutputMediaStream_FFMPEG(struct OutputMediaStream_FFMPEG* stream, unsigned char* data, int size)
 {
    stream->write(data, size);
 }
--- a/modules/highgui/src/cap_ffmpeg_impl_v2.hpp
+++ b/modules/highgui/src/cap_ffmpeg_impl_v2.hpp
@ -43,6 +43,7 @@
 #include "cap_ffmpeg_api.hpp"
 #include <assert.h>
 #include <algorithm>
 #include <limits>
 #if defined _MSC_VER && _MSC_VER >= 1200
 #pragma warning( disable: 4244 4510 4512 4610 )
@ -1611,3 +1612,295 @@ void CvVideoWriter_FFMPEG::close()
        return writer->writeFrame(data, step, width, height, cn, origin);
    }
 /*
 * For CUDA encoder
 */
 struct OutputMediaStream_FFMPEG
 {
    bool open(const char* fileName, int width, int height, double fps);
    void write(unsigned char* data, int size);
    void close();
    // add a video output stream to the container
    static AVStream* addVideoStream(AVFormatContext *oc, CodecID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format);
    AVOutputFormat* fmt_;
    AVFormatContext* oc_;
    AVStream* video_st_;
 };
 void OutputMediaStream_FFMPEG::close()
 {
    // no more frame to compress. The codec has a latency of a few
    // frames if using B frames, so we get the last frames by
    // passing the same picture again
    // TODO -- do we need to account for latency here?
    if (oc_)
    {
        // write the trailer, if any
        av_write_trailer(oc_);
        // free the streams
        for (unsigned int i = 0; i < oc_->nb_streams; ++i)
        {
            av_freep(&oc_->streams[i]->codec);
            av_freep(&oc_->streams[i]);
        }
        if (!(fmt_->flags & AVFMT_NOFILE) && oc_->pb)
        {
            // close the output file
            #if LIBAVCODEC_VERSION_INT < ((52<<16)+(123<<8)+0)
                #if LIBAVCODEC_VERSION_INT >= ((51<<16)+(49<<8)+0)
                    url_fclose(oc_->pb);
                #else
                    url_fclose(&oc_->pb);
                #endif
            #else
                avio_close(oc_->pb);
            #endif
        }
        // free the stream
        av_free(oc_);
    }
 }
 AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CodecID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format)
 {
    #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 10, 0)
        AVStream* st = avformat_new_stream(oc, 0);
    #else
        AVStream* st = av_new_stream(oc, 0);
    #endif
    if (!st)
        return 0;
    #if LIBAVFORMAT_BUILD > 4628
        AVCodecContext* c = st->codec;
    #else
        AVCodecContext* c = &(st->codec);
    #endif
    c->codec_id = codec_id;
    c->codec_type = AVMEDIA_TYPE_VIDEO;
    // put sample parameters
    unsigned long long lbit_rate = static_cast<unsigned long long>(bitrate);
    lbit_rate += (bitrate / 4);
    lbit_rate = std::min(lbit_rate, static_cast<unsigned long long>(std::numeric_limits<int>::max()));
    c->bit_rate = bitrate;
    // took advice from
    // http://ffmpeg-users.933282.n4.nabble.com/warning-clipping-1-dct-coefficients-to-127-127-td934297.html
    c->qmin = 3;
    // resolution must be a multiple of two
    c->width = w;
    c->height = h;
    AVCodec* codec = avcodec_find_encoder(c->codec_id);
    // time base: this is the fundamental unit of time (in seconds) in terms
    // of which frame timestamps are represented. for fixed-fps content,
    // timebase should be 1/framerate and timestamp increments should be
    // identically 1
    int frame_rate = static_cast<int>(fps+0.5);
    int frame_rate_base = 1;
    while (fabs(static_cast<double>(frame_rate)/frame_rate_base) - fps > 0.001)
    {
        frame_rate_base *= 10;
        frame_rate = static_cast<int>(fps*frame_rate_base + 0.5);
    }
    c->time_base.den = frame_rate;
    c->time_base.num = frame_rate_base;
    #if LIBAVFORMAT_BUILD > 4752
        // adjust time base for supported framerates
        if (codec && codec->supported_framerates)
        {
            AVRational req = {frame_rate, frame_rate_base};
            const AVRational* best = NULL;
            AVRational best_error = {INT_MAX, 1};
            for (const AVRational* p = codec->supported_framerates; p->den!=0; ++p)
            {
                AVRational error = av_sub_q(req, *p);
                if (error.num < 0) 
                    error.num *= -1;
                if (av_cmp_q(error, best_error) < 0)
                {
                    best_error= error;
                    best= p;
                }
            }
            c->time_base.den= best->num;
            c->time_base.num= best->den;
        }
    #endif
    c->gop_size = 12; // emit one intra frame every twelve frames at most
    c->pix_fmt = pixel_format;
    if (c->codec_id == CODEC_ID_MPEG2VIDEO)
        c->max_b_frames = 2;
    if (c->codec_id == CODEC_ID_MPEG1VIDEO || c->codec_id == CODEC_ID_MSMPEG4V3)
    {
        // needed to avoid using macroblocks in which some coeffs overflow
        // this doesnt happen with normal video, it just happens here as the
        // motion of the chroma plane doesnt match the luma plane
        // avoid FFMPEG warning 'clipping 1 dct coefficients...'
        c->mb_decision = 2;
    }
    #if LIBAVCODEC_VERSION_INT > 0x000409
        // some formats want stream headers to be seperate
        if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        {
            c->flags |= CODEC_FLAG_GLOBAL_HEADER;
        }
    #endif
    return st;
 }
 bool OutputMediaStream_FFMPEG::open(const char* fileName, int width, int height, double fps)
 {
    fmt_ = 0;
    oc_ = 0;
    video_st_ = 0;
    // tell FFMPEG to register codecs
    av_register_all();
    av_log_set_level(AV_LOG_ERROR);
    // auto detect the output format from the name and fourcc code
    #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0)
        fmt_ = av_guess_format(NULL, fileName, NULL);
    #else
        fmt_ = guess_format(NULL, fileName, NULL);
    #endif
    if (!fmt_)
        return false;
    CodecID codec_id = CODEC_ID_H264;
    // alloc memory for context
    #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0)
        oc_ = avformat_alloc_context();
    #else
        oc_ = av_alloc_format_context();
    #endif
    if (!oc_)
        return false;
    // set some options
    oc_->oformat = fmt_;
    snprintf(oc_->filename, sizeof(oc_->filename), "%s", fileName);
    oc_->max_delay = (int)(0.7 * AV_TIME_BASE); // This reduces buffer underrun warnings with MPEG
    // set a few optimal pixel formats for lossless codecs of interest..
    PixelFormat codec_pix_fmt = PIX_FMT_YUV420P;
    int bitrate_scale = 64;
    // TODO -- safe to ignore output audio stream?
    video_st_ = addVideoStream(oc_, codec_id, width, height, width * height * bitrate_scale, fps, codec_pix_fmt);
    if (!video_st_)
        return false;
    // set the output parameters (must be done even if no parameters)
    #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(53, 2, 0)
        if (av_set_parameters(oc_, NULL) < 0)
            return false;
    #endif
    // now that all the parameters are set, we can open the audio and
    // video codecs and allocate the necessary encode buffers
    #if LIBAVFORMAT_BUILD > 4628
        AVCodecContext* c = (video_st_->codec);
    #else
        AVCodecContext* c = &(video_st_->codec);
    #endif
    c->codec_tag = MKTAG('H', '2', '6', '4');
    c->bit_rate_tolerance = c->bit_rate;
    // open the output file, if needed
    if (!(fmt_->flags & AVFMT_NOFILE))
    {
        #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(53, 2, 0)
            int err = url_fopen(&oc_->pb, fileName, URL_WRONLY);
        #else
            int err = avio_open(&oc_->pb, fileName, AVIO_FLAG_WRITE);
        #endif
        if (err != 0)
            return false;
    }
    // write the stream header, if any
    #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(53, 2, 0)
        av_write_header(oc_);
    #else
        avformat_write_header(oc_, NULL);
    #endif
    return true;
 }
 void OutputMediaStream_FFMPEG::write(unsigned char* data, int size)
 {
    // if zero size, it means the image was buffered
    if (size > 0) 
    {
        AVPacket pkt;
        av_init_packet(&pkt);
        pkt.stream_index = video_st_->index;
        pkt.data = data;
        pkt.size = size;
        // write the compressed frame in the media file
        av_write_frame(oc_, &pkt);
    }
 }
 struct OutputMediaStream_FFMPEG* create_OutputMediaStream_FFMPEG(const char* fileName, int width, int height, double fps)
 {
    OutputMediaStream_FFMPEG* stream = (OutputMediaStream_FFMPEG*) malloc(sizeof(OutputMediaStream_FFMPEG));
    if (stream->open(fileName, width, height, fps))
        return stream;
    stream->close();
    free(stream);
    return 0;
 }
 void release_OutputMediaStream_FFMPEG(struct OutputMediaStream_FFMPEG* stream)
 {
    stream->close();
    free(stream);
 }
 void write_OutputMediaStream_FFMPEG(struct OutputMediaStream_FFMPEG* stream, unsigned char* data, int size)
 {
    stream->write(data, size);
 }
--- a/samples/gpu/video_writer.cpp
+++ b/samples/gpu/video_writer.cpp
@ -0,0 +1,96 @@
 #include <iostream>
 #include <vector>
 #include <numeric>
 #include "opencv2/core/core.hpp"
 #include "opencv2/gpu/gpu.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/contrib/contrib.hpp"
 int main(int argc, const char* argv[])
 {
    if (argc != 2)
    {
        std::cerr << "Usage : video_writer <input video file>" << std::endl;
        return -1;
    }
    const double FPS = 25.0;
    cv::VideoCapture reader(argv[1]);
    if (!reader.isOpened())
    {
        std::cerr << "Can't open input video file" << std::endl;
        return -1;
    }
    cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
    cv::VideoWriter writer;
    cv::gpu::VideoWriter_GPU d_writer;
    cv::Mat frame;
    cv::gpu::GpuMat d_frame;
    std::vector<double> cpu_times;
    std::vector<double> gpu_times;
    cv::TickMeter tm;
    for (int i = 1;; ++i)
    {
        std::cout << "Read " << i << " frame" << std::endl;
        reader >> frame;
        if (frame.empty())
        {
            std::cout << "Stop" << std::endl;
            break;
        }
        if (!writer.isOpened())
        {
            std::cout << "Frame Size : " << frame.cols << "x" << frame.rows << std::endl;
            std::cout << "Open CPU Writer" << std::endl;
            if (!writer.open("output_cpu.avi", CV_FOURCC('X', 'V', 'I', 'D'), FPS, frame.size()))
                return -1;
        }
        if (!d_writer.isOpened())
        {
            std::cout << "Open GPU Writer" << std::endl;
            d_writer.open("output_gpu.avi", frame.size(), FPS);
        }
        d_frame.upload(frame);
        std::cout << "Write " << i << " frame" << std::endl;
        tm.reset(); tm.start();
        writer.write(frame);
        tm.stop();
        cpu_times.push_back(tm.getTimeMilli());
        tm.reset(); tm.start();
        d_writer.write(d_frame);
        tm.stop();
        gpu_times.push_back(tm.getTimeMilli());
    }
    std::cout << std::endl << "Results:" << std::endl;
    std::sort(cpu_times.begin(), cpu_times.end());
    std::sort(gpu_times.begin(), gpu_times.end());
    double cpu_avg = std::accumulate(cpu_times.begin(), cpu_times.end(), 0.0) / cpu_times.size();
    double gpu_avg = std::accumulate(gpu_times.begin(), gpu_times.end(), 0.0) / gpu_times.size();
    std::cout << "CPU [XVID] : Avg : " << cpu_avg << " ms FPS : " << 1000.0 / cpu_avg << std::endl;
    std::cout << "GPU [H264] : Avg : " << gpu_avg << " ms FPS : " << 1000.0 / gpu_avg << std::endl;
    return 0;
 }