switched to new device layer in gpucodec module
This commit is contained in:
		| @@ -4,9 +4,9 @@ endif() | |||||||
|  |  | ||||||
| set(the_description "CUDA-accelerated Video Encoding/Decoding") | set(the_description "CUDA-accelerated Video Encoding/Decoding") | ||||||
|  |  | ||||||
| ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations) | ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef) | ||||||
|  |  | ||||||
| ocv_add_module(cudacodec opencv_highgui) | ocv_add_module(cudacodec opencv_highgui OPTIONAL opencv_cudev) | ||||||
|  |  | ||||||
| ocv_module_include_directories() | ocv_module_include_directories() | ||||||
| ocv_glob_module_sources() | ocv_glob_module_sources() | ||||||
|   | |||||||
| @@ -47,13 +47,26 @@ | |||||||
|  * source and converts to output in ARGB format |  * source and converts to output in ARGB format | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| #include "opencv2/core/cuda/common.hpp" | #include "opencv2/opencv_modules.hpp" | ||||||
|  |  | ||||||
| namespace cv { namespace cuda { namespace device | #ifndef HAVE_OPENCV_CUDEV | ||||||
|  |  | ||||||
|  | #error "opencv_cudev is required" | ||||||
|  |  | ||||||
|  | #else | ||||||
|  |  | ||||||
|  | #include "opencv2/cudev/common.hpp" | ||||||
|  |  | ||||||
|  | using namespace cv; | ||||||
|  | using namespace cv::cudev; | ||||||
|  |  | ||||||
|  | void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height); | ||||||
|  |  | ||||||
|  | namespace | ||||||
| { | { | ||||||
|     __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f}; |     __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f}; | ||||||
|  |  | ||||||
|     __device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue) |     __device__ static void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue) | ||||||
|     { |     { | ||||||
|         float luma, chromaCb, chromaCr; |         float luma, chromaCb, chromaCr; | ||||||
|  |  | ||||||
| @@ -76,7 +89,7 @@ namespace cv { namespace cuda { namespace device | |||||||
|                 (chromaCr * constHueColorSpaceMat[8]); |                 (chromaCr * constHueColorSpaceMat[8]); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     __device__ uint RGBA_pack_10bit(float red, float green, float blue, uint alpha) |     __device__ static uint RGBA_pack_10bit(float red, float green, float blue, uint alpha) | ||||||
|     { |     { | ||||||
|         uint ARGBpixel = 0; |         uint ARGBpixel = 0; | ||||||
|  |  | ||||||
| @@ -99,9 +112,9 @@ namespace cv { namespace cuda { namespace device | |||||||
|     #define COLOR_COMPONENT_BIT_SIZE 10 |     #define COLOR_COMPONENT_BIT_SIZE 10 | ||||||
|     #define COLOR_COMPONENT_MASK     0x3FF |     #define COLOR_COMPONENT_MASK     0x3FF | ||||||
|  |  | ||||||
|     __global__ void NV12_to_RGB(uchar* srcImage, size_t nSourcePitch, |     __global__ void NV12_to_RGB(const uchar* srcImage, size_t nSourcePitch, | ||||||
|                                 uint* dstImage, size_t nDestPitch, |                                   uint* dstImage, size_t nDestPitch, | ||||||
|                                 uint width, uint height) |                                   uint width, uint height) | ||||||
|     { |     { | ||||||
|         // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread |         // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread | ||||||
|         const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); |         const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); | ||||||
| @@ -171,18 +184,24 @@ namespace cv { namespace cuda { namespace device | |||||||
|         dstImage[y * dstImagePitch + x     ] = RGBA_pack_10bit(red[0], green[0], blue[0], ((uint)0xff << 24)); |         dstImage[y * dstImagePitch + x     ] = RGBA_pack_10bit(red[0], green[0], blue[0], ((uint)0xff << 24)); | ||||||
|         dstImage[y * dstImagePitch + x + 1 ] = RGBA_pack_10bit(red[1], green[1], blue[1], ((uint)0xff << 24)); |         dstImage[y * dstImagePitch + x + 1 ] = RGBA_pack_10bit(red[1], green[1], blue[1], ((uint)0xff << 24)); | ||||||
|     } |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|     void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream) | void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height) | ||||||
|     { | { | ||||||
|         dim3 block(32, 8); |     // Final Stage: NV12toARGB color space conversion | ||||||
|         dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y)); |  | ||||||
|  |  | ||||||
|         NV12_to_RGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step, |     _outFrame.create(height, width, CV_8UC4); | ||||||
|             interopFrame.cols, interopFrame.rows); |     GpuMat outFrame = _outFrame.getGpuMat(); | ||||||
|  |  | ||||||
|         cudaSafeCall( cudaGetLastError() ); |     dim3 block(32, 8); | ||||||
|  |     dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y)); | ||||||
|  |  | ||||||
|         if (stream == 0) |     NV12_to_RGB<<<grid, block>>>(decodedFrame.ptr<uchar>(), decodedFrame.step, | ||||||
|             cudaSafeCall( cudaDeviceSynchronize() ); |                                  outFrame.ptr<uint>(), outFrame.step, | ||||||
|     } |                                  width, height); | ||||||
| }}} |  | ||||||
|  |     CV_CUDEV_SAFE_CALL( cudaGetLastError() ); | ||||||
|  |     CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #endif | ||||||
|   | |||||||
| @@ -40,10 +40,21 @@ | |||||||
| // | // | ||||||
| //M*/ | //M*/ | ||||||
|  |  | ||||||
| #include "opencv2/core/cuda/common.hpp" | #include "opencv2/opencv_modules.hpp" | ||||||
| #include "opencv2/core/cuda/vec_traits.hpp" |  | ||||||
|  |  | ||||||
| namespace cv { namespace cuda { namespace device | #ifndef HAVE_OPENCV_CUDEV | ||||||
|  |  | ||||||
|  | #error "opencv_cudev is required" | ||||||
|  |  | ||||||
|  | #else | ||||||
|  |  | ||||||
|  | #include "opencv2/cudev/ptr2d/glob.hpp" | ||||||
|  |  | ||||||
|  | using namespace cv::cudev; | ||||||
|  |  | ||||||
|  | void RGB_to_YV12(const GpuMat& src, GpuMat& dst); | ||||||
|  |  | ||||||
|  | namespace | ||||||
| { | { | ||||||
|     __device__ __forceinline__ void rgb_to_y(const uchar b, const uchar g, const uchar r, uchar& y) |     __device__ __forceinline__ void rgb_to_y(const uchar b, const uchar g, const uchar r, uchar& y) | ||||||
|     { |     { | ||||||
| @@ -57,7 +68,7 @@ namespace cv { namespace cuda { namespace device | |||||||
|         v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100); |         v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     __global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst) |     __global__ void Gray_to_YV12(const GlobPtrSz<uchar> src, GlobPtr<uchar> dst) | ||||||
|     { |     { | ||||||
|         const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; |         const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; | ||||||
|         const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; |         const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; | ||||||
| @@ -67,9 +78,9 @@ namespace cv { namespace cuda { namespace device | |||||||
|  |  | ||||||
|         // get pointers to the data |         // get pointers to the data | ||||||
|         const size_t planeSize = src.rows * dst.step; |         const size_t planeSize = src.rows * dst.step; | ||||||
|         PtrStepb y_plane(dst.data, dst.step); |         GlobPtr<uchar> y_plane = globPtr(dst.data, dst.step); | ||||||
|         PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2); |         GlobPtr<uchar> u_plane = globPtr(y_plane.data + planeSize, dst.step / 2); | ||||||
|         PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2); |         GlobPtr<uchar> v_plane = globPtr(u_plane.data + (planeSize / 4), dst.step / 2); | ||||||
|  |  | ||||||
|         uchar pix; |         uchar pix; | ||||||
|         uchar y_val, u_val, v_val; |         uchar y_val, u_val, v_val; | ||||||
| @@ -94,7 +105,7 @@ namespace cv { namespace cuda { namespace device | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     template <typename T> |     template <typename T> | ||||||
|     __global__ void RGB_to_YV12(const PtrStepSz<T> src, PtrStepb dst) |     __global__ void RGB_to_YV12(const GlobPtrSz<T> src, GlobPtr<uchar> dst) | ||||||
|     { |     { | ||||||
|         const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; |         const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2; | ||||||
|         const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; |         const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2; | ||||||
| @@ -104,9 +115,9 @@ namespace cv { namespace cuda { namespace device | |||||||
|  |  | ||||||
|         // get pointers to the data |         // get pointers to the data | ||||||
|         const size_t planeSize = src.rows * dst.step; |         const size_t planeSize = src.rows * dst.step; | ||||||
|         PtrStepb y_plane(dst.data, dst.step); |         GlobPtr<uchar> y_plane = globPtr(dst.data, dst.step); | ||||||
|         PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2); |         GlobPtr<uchar> u_plane = globPtr(y_plane.data + planeSize, dst.step / 2); | ||||||
|         PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2); |         GlobPtr<uchar> v_plane = globPtr(u_plane.data + (planeSize / 4), dst.step / 2); | ||||||
|  |  | ||||||
|         T pix; |         T pix; | ||||||
|         uchar y_val, u_val, v_val; |         uchar y_val, u_val, v_val; | ||||||
| @@ -129,42 +140,28 @@ namespace cv { namespace cuda { namespace device | |||||||
|         u_plane(y / 2, x / 2) = u_val; |         u_plane(y / 2, x / 2) = u_val; | ||||||
|         v_plane(y / 2, x / 2) = v_val; |         v_plane(y / 2, x / 2) = v_val; | ||||||
|     } |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|     void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream) | void RGB_to_YV12(const GpuMat& src, GpuMat& dst) | ||||||
|  | { | ||||||
|  |     const dim3 block(32, 8); | ||||||
|  |     const dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2)); | ||||||
|  |  | ||||||
|  |     switch (src.channels()) | ||||||
|     { |     { | ||||||
|         dim3 block(32, 8); |     case 1: | ||||||
|         dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2)); |         Gray_to_YV12<<<grid, block>>>(globPtr<uchar>(src), globPtr<uchar>(dst)); | ||||||
|  |         break; | ||||||
|         Gray_to_YV12<<<grid, block, 0, stream>>>(src, dst); |     case 3: | ||||||
|         cudaSafeCall( cudaGetLastError() ); |         RGB_to_YV12<<<grid, block>>>(globPtr<uchar3>(src), globPtr<uchar>(dst)); | ||||||
|  |         break; | ||||||
|         if (stream == 0) |     case 4: | ||||||
|             cudaSafeCall( cudaDeviceSynchronize() ); |         RGB_to_YV12<<<grid, block>>>(globPtr<uchar4>(src), globPtr<uchar>(dst)); | ||||||
|     } |         break; | ||||||
|     template <int cn> |  | ||||||
|     void RGB_to_YV12_caller(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream) |  | ||||||
|     { |  | ||||||
|         typedef typename TypeVec<uchar, cn>::vec_type src_t; |  | ||||||
|  |  | ||||||
|         dim3 block(32, 8); |  | ||||||
|         dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2)); |  | ||||||
|  |  | ||||||
|         RGB_to_YV12<<<grid, block, 0, stream>>>(static_cast< PtrStepSz<src_t> >(src), dst); |  | ||||||
|         cudaSafeCall( cudaGetLastError() ); |  | ||||||
|  |  | ||||||
|         if (stream == 0) |  | ||||||
|             cudaSafeCall( cudaDeviceSynchronize() ); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream) |     CV_CUDEV_SAFE_CALL( cudaGetLastError() ); | ||||||
|     { |     CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); | ||||||
|         typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream); | } | ||||||
|  |  | ||||||
|         static const func_t funcs[] = | #endif | ||||||
|         { |  | ||||||
|             0, Gray_to_YV12_caller, 0, RGB_to_YV12_caller<3>, RGB_to_YV12_caller<4> |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         funcs[cn](src, dst, stream); |  | ||||||
|     } |  | ||||||
| }}} |  | ||||||
|   | |||||||
| @@ -53,10 +53,7 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>&) { | |||||||
|  |  | ||||||
| #else // HAVE_NVCUVID | #else // HAVE_NVCUVID | ||||||
|  |  | ||||||
| namespace cv { namespace cuda { namespace device | void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height); | ||||||
| { |  | ||||||
|     void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream = 0); |  | ||||||
| }}} |  | ||||||
|  |  | ||||||
| using namespace cv::cudacodec::detail; | using namespace cv::cudacodec::detail; | ||||||
|  |  | ||||||
| @@ -125,18 +122,6 @@ namespace | |||||||
|         CUvideoctxlock m_lock; |         CUvideoctxlock m_lock; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     void cudaPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height) |  | ||||||
|     { |  | ||||||
|         using namespace cv::cuda::device; |  | ||||||
|  |  | ||||||
|         // Final Stage: NV12toARGB color space conversion |  | ||||||
|  |  | ||||||
|         _outFrame.create(height, width, CV_8UC4); |  | ||||||
|         GpuMat outFrame = _outFrame.getGpuMat(); |  | ||||||
|  |  | ||||||
|         NV12_to_RGB(decodedFrame, outFrame); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     bool VideoReaderImpl::nextFrame(OutputArray frame) |     bool VideoReaderImpl::nextFrame(OutputArray frame) | ||||||
|     { |     { | ||||||
|         if (videoSource_->hasError() || videoParser_->hasError()) |         if (videoSource_->hasError() || videoParser_->hasError()) | ||||||
| @@ -195,7 +180,7 @@ namespace | |||||||
|  |  | ||||||
|             // perform post processing on the CUDA surface (performs colors space conversion and post processing) |             // perform post processing on the CUDA surface (performs colors space conversion and post processing) | ||||||
|             // comment this out if we inclue the line of code seen above |             // comment this out if we inclue the line of code seen above | ||||||
|             cudaPostProcessFrame(decodedFrame, frame, videoDecoder_->targetWidth(), videoDecoder_->targetHeight()); |             videoDecPostProcessFrame(decodedFrame, frame, videoDecoder_->targetWidth(), videoDecoder_->targetHeight()); | ||||||
|  |  | ||||||
|             // unmap video frame |             // unmap video frame | ||||||
|             // unmapFrame() synchronizes with the VideoDecode API (ensures the frame has finished decoding) |             // unmapFrame() synchronizes with the VideoDecode API (ensures the frame has finished decoding) | ||||||
|   | |||||||
| @@ -62,10 +62,7 @@ Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>&, S | |||||||
|  |  | ||||||
| #else // !defined HAVE_CUDA || !defined WIN32 | #else // !defined HAVE_CUDA || !defined WIN32 | ||||||
|  |  | ||||||
| namespace cv { namespace cuda { namespace device | void RGB_to_YV12(const GpuMat& src, GpuMat& dst); | ||||||
| { |  | ||||||
|     void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream = 0); |  | ||||||
| }}} |  | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////// | ||||||
| // VideoWriterImpl | // VideoWriterImpl | ||||||
| @@ -642,7 +639,7 @@ namespace | |||||||
|  |  | ||||||
|         if (inputFormat_ == SF_BGR) |         if (inputFormat_ == SF_BGR) | ||||||
|         { |         { | ||||||
|             device::RGB_to_YV12(frame, frame.channels(), videoFrame_); |             RGB_to_YV12(frame, videoFrame_); | ||||||
|         } |         } | ||||||
|         else |         else | ||||||
|         { |         { | ||||||
|   | |||||||
| @@ -72,7 +72,7 @@ template <typename T> struct GlobPtrSz : GlobPtr<T> | |||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| __host__ GlobPtr<T> globPtr(T* data, size_t step) | __host__ __device__ GlobPtr<T> globPtr(T* data, size_t step) | ||||||
| { | { | ||||||
|     GlobPtr<T> p; |     GlobPtr<T> p; | ||||||
|     p.data = data; |     p.data = data; | ||||||
| @@ -81,7 +81,7 @@ __host__ GlobPtr<T> globPtr(T* data, size_t step) | |||||||
| } | } | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| __host__ GlobPtrSz<T> globPtr(T* data, size_t step, int rows, int cols) | __host__ __device__ GlobPtrSz<T> globPtr(T* data, size_t step, int rows, int cols) | ||||||
| { | { | ||||||
|     GlobPtrSz<T> p; |     GlobPtrSz<T> p; | ||||||
|     p.data = data; |     p.data = data; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Vladislav Vinogradov
					Vladislav Vinogradov