hwcontext: add a CUDA implementation
This commit is contained in:
		| @@ -17,6 +17,8 @@ API changes, most recent first: | ||||
|   xxxxxxx buffer.h - Add av_buffer_pool_init2(). | ||||
|   xxxxxxx hwcontext.h - Add a new installed header hwcontext.h with a new API | ||||
|                         for handling hwaccel frames. | ||||
|   xxxxxxx hwcontext_cuda.h - Add a new installed header hwcontext_cuda.h with | ||||
|                              CUDA-specific hwcontext definitions. | ||||
|   xxxxxxx hwcontext_vdpau.h - Add a new installed header hwcontext_vdpau.h with | ||||
|                               VDPAU-specific hwcontext definitions. | ||||
|   xxxxxxx pixfmt.h - Add AV_PIX_FMT_CUDA. | ||||
|   | ||||
| @@ -24,6 +24,7 @@ HEADERS = adler32.h                                                     \ | ||||
|           frame.h                                                       \ | ||||
|           hmac.h                                                        \ | ||||
|           hwcontext.h                                                   \ | ||||
|           hwcontext_cuda.h                                              \ | ||||
|           hwcontext_vdpau.h                                             \ | ||||
|           imgutils.h                                                    \ | ||||
|           intfloat.h                                                    \ | ||||
| @@ -106,6 +107,7 @@ OBJS = adler32.o                                                        \ | ||||
|        xtea.o                                                           \ | ||||
|  | ||||
| OBJS-$(CONFIG_LZO)                      += lzo.o | ||||
| OBJS-$(CONFIG_CUDA)                     += hwcontext_cuda.o | ||||
| OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o | ||||
|  | ||||
| OBJS += $(COMPAT_OBJS:%=../compat/%) | ||||
|   | ||||
| @@ -29,6 +29,9 @@ | ||||
| #include "pixfmt.h" | ||||
|  | ||||
| static const HWContextType *hw_table[] = { | ||||
| #if CONFIG_CUDA | ||||
|     &ff_hwcontext_type_cuda, | ||||
| #endif | ||||
| #if CONFIG_VDPAU | ||||
|     &ff_hwcontext_type_vdpau, | ||||
| #endif | ||||
|   | ||||
| @@ -26,6 +26,7 @@ | ||||
|  | ||||
| enum AVHWDeviceType { | ||||
|     AV_HWDEVICE_TYPE_VDPAU, | ||||
|     AV_HWDEVICE_TYPE_CUDA, | ||||
| }; | ||||
|  | ||||
| typedef struct AVHWDeviceInternal AVHWDeviceInternal; | ||||
|   | ||||
							
								
								
									
										270
									
								
								libavutil/hwcontext_cuda.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										270
									
								
								libavutil/hwcontext_cuda.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,270 @@ | ||||
| /* | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include "buffer.h" | ||||
| #include "common.h" | ||||
| #include "hwcontext.h" | ||||
| #include "hwcontext_internal.h" | ||||
| #include "hwcontext_cuda.h" | ||||
| #include "mem.h" | ||||
| #include "pixdesc.h" | ||||
| #include "pixfmt.h" | ||||
|  | ||||
| typedef struct CUDAFramesContext { | ||||
|     int shift_width, shift_height; | ||||
| } CUDAFramesContext; | ||||
|  | ||||
| static const enum AVPixelFormat supported_formats[] = { | ||||
|     AV_PIX_FMT_NV12, | ||||
|     AV_PIX_FMT_YUV420P, | ||||
|     AV_PIX_FMT_YUV444P, | ||||
| }; | ||||
|  | ||||
| static void cuda_buffer_free(void *opaque, uint8_t *data) | ||||
| { | ||||
|     AVHWFramesContext *ctx = opaque; | ||||
|     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; | ||||
|  | ||||
|     CUcontext dummy; | ||||
|  | ||||
|     cuCtxPushCurrent(hwctx->cuda_ctx); | ||||
|  | ||||
|     cuMemFree((CUdeviceptr)data); | ||||
|  | ||||
|     cuCtxPopCurrent(&dummy); | ||||
| } | ||||
|  | ||||
| static AVBufferRef *cuda_pool_alloc(void *opaque, int size) | ||||
| { | ||||
|     AVHWFramesContext     *ctx = opaque; | ||||
|     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; | ||||
|  | ||||
|     AVBufferRef *ret = NULL; | ||||
|     CUcontext dummy = NULL; | ||||
|     CUdeviceptr data; | ||||
|     CUresult err; | ||||
|  | ||||
|     err = cuCtxPushCurrent(hwctx->cuda_ctx); | ||||
|     if (err != CUDA_SUCCESS) { | ||||
|         av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n"); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     err = cuMemAlloc(&data, size); | ||||
|     if (err != CUDA_SUCCESS) | ||||
|         goto fail; | ||||
|  | ||||
|     ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); | ||||
|     if (!ret) { | ||||
|         cuMemFree(data); | ||||
|         goto fail; | ||||
|     } | ||||
|  | ||||
| fail: | ||||
|     cuCtxPopCurrent(&dummy); | ||||
|     return ret; | ||||
| } | ||||
|  | ||||
| static int cuda_frames_init(AVHWFramesContext *ctx) | ||||
| { | ||||
|     CUDAFramesContext *priv = ctx->internal->priv; | ||||
|     int i; | ||||
|  | ||||
|     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { | ||||
|         if (ctx->sw_format == supported_formats[i]) | ||||
|             break; | ||||
|     } | ||||
|     if (i == FF_ARRAY_ELEMS(supported_formats)) { | ||||
|         av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n", | ||||
|                av_get_pix_fmt_name(ctx->sw_format)); | ||||
|         return AVERROR(ENOSYS); | ||||
|     } | ||||
|  | ||||
|     av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height); | ||||
|  | ||||
|     if (!ctx->pool) { | ||||
|         int size; | ||||
|  | ||||
|         switch (ctx->sw_format) { | ||||
|         case AV_PIX_FMT_NV12: | ||||
|         case AV_PIX_FMT_YUV420P: | ||||
|             size = ctx->width * ctx->height * 3 / 2; | ||||
|             break; | ||||
|         case AV_PIX_FMT_YUV444P: | ||||
|             size = ctx->width * ctx->height * 3; | ||||
|             break; | ||||
|         } | ||||
|  | ||||
|         ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL); | ||||
|         if (!ctx->internal->pool_internal) | ||||
|             return AVERROR(ENOMEM); | ||||
|     } | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) | ||||
| { | ||||
|     frame->buf[0] = av_buffer_pool_get(ctx->pool); | ||||
|     if (!frame->buf[0]) | ||||
|         return AVERROR(ENOMEM); | ||||
|  | ||||
|     switch (ctx->sw_format) { | ||||
|     case AV_PIX_FMT_NV12: | ||||
|         frame->data[0]     = frame->buf[0]->data; | ||||
|         frame->data[1]     = frame->data[0] + ctx->width * ctx->height; | ||||
|         frame->linesize[0] = ctx->width; | ||||
|         frame->linesize[1] = ctx->width; | ||||
|         break; | ||||
|     case AV_PIX_FMT_YUV420P: | ||||
|         frame->data[0]     = frame->buf[0]->data; | ||||
|         frame->data[2]     = frame->data[0] + ctx->width * ctx->height; | ||||
|         frame->data[1]     = frame->data[2] + ctx->width * ctx->height / 4; | ||||
|         frame->linesize[0] = ctx->width; | ||||
|         frame->linesize[1] = ctx->width / 2; | ||||
|         frame->linesize[2] = ctx->width / 2; | ||||
|         break; | ||||
|     case AV_PIX_FMT_YUV444P: | ||||
|         frame->data[0]     = frame->buf[0]->data; | ||||
|         frame->data[1]     = frame->data[0] + ctx->width * ctx->height; | ||||
|         frame->data[2]     = frame->data[1] + ctx->width * ctx->height; | ||||
|         frame->linesize[0] = ctx->width; | ||||
|         frame->linesize[1] = ctx->width; | ||||
|         frame->linesize[2] = ctx->width; | ||||
|         break; | ||||
|     default: | ||||
|         av_frame_unref(frame); | ||||
|         return AVERROR_BUG; | ||||
|     } | ||||
|  | ||||
|     frame->format = AV_PIX_FMT_CUDA; | ||||
|     frame->width  = ctx->width; | ||||
|     frame->height = ctx->height; | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| static int cuda_transfer_get_formats(AVHWFramesContext *ctx, | ||||
|                                      enum AVHWFrameTransferDirection dir, | ||||
|                                      enum AVPixelFormat **formats) | ||||
| { | ||||
|     enum AVPixelFormat *fmts; | ||||
|  | ||||
|     fmts = av_malloc_array(2, sizeof(*fmts)); | ||||
|     if (!fmts) | ||||
|         return AVERROR(ENOMEM); | ||||
|  | ||||
|     fmts[0] = ctx->sw_format; | ||||
|     fmts[1] = AV_PIX_FMT_NONE; | ||||
|  | ||||
|     *formats = fmts; | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, | ||||
|                                    const AVFrame *src) | ||||
| { | ||||
|     CUDAFramesContext           *priv = ctx->internal->priv; | ||||
|     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; | ||||
|  | ||||
|     CUcontext dummy; | ||||
|     CUresult err; | ||||
|     int i; | ||||
|  | ||||
|     err = cuCtxPushCurrent(device_hwctx->cuda_ctx); | ||||
|     if (err != CUDA_SUCCESS) | ||||
|         return AVERROR_UNKNOWN; | ||||
|  | ||||
|     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { | ||||
|         CUDA_MEMCPY2D cpy = { | ||||
|             .srcMemoryType = CU_MEMORYTYPE_DEVICE, | ||||
|             .dstMemoryType = CU_MEMORYTYPE_HOST, | ||||
|             .srcDevice     = (CUdeviceptr)src->data[i], | ||||
|             .dstHost       = dst->data[i], | ||||
|             .srcPitch      = src->linesize[i], | ||||
|             .dstPitch      = dst->linesize[i], | ||||
|             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]), | ||||
|             .Height        = src->height >> (i ? priv->shift_height : 0), | ||||
|         }; | ||||
|  | ||||
|         err = cuMemcpy2D(&cpy); | ||||
|         if (err != CUDA_SUCCESS) { | ||||
|             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); | ||||
|             return AVERROR_UNKNOWN; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     cuCtxPopCurrent(&dummy); | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, | ||||
|                                  const AVFrame *src) | ||||
| { | ||||
|     CUDAFramesContext           *priv = ctx->internal->priv; | ||||
|     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; | ||||
|  | ||||
|     CUcontext dummy; | ||||
|     CUresult err; | ||||
|     int i; | ||||
|  | ||||
|     err = cuCtxPushCurrent(device_hwctx->cuda_ctx); | ||||
|     if (err != CUDA_SUCCESS) | ||||
|         return AVERROR_UNKNOWN; | ||||
|  | ||||
|     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { | ||||
|         CUDA_MEMCPY2D cpy = { | ||||
|             .srcMemoryType = CU_MEMORYTYPE_HOST, | ||||
|             .dstMemoryType = CU_MEMORYTYPE_DEVICE, | ||||
|             .srcHost       = src->data[i], | ||||
|             .dstDevice     = (CUdeviceptr)dst->data[i], | ||||
|             .srcPitch      = src->linesize[i], | ||||
|             .dstPitch      = dst->linesize[i], | ||||
|             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]), | ||||
|             .Height        = src->height >> (i ? priv->shift_height : 0), | ||||
|         }; | ||||
|  | ||||
|         err = cuMemcpy2D(&cpy); | ||||
|         if (err != CUDA_SUCCESS) { | ||||
|             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); | ||||
|             return AVERROR_UNKNOWN; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     cuCtxPopCurrent(&dummy); | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| const HWContextType ff_hwcontext_type_cuda = { | ||||
|     .type                 = AV_HWDEVICE_TYPE_CUDA, | ||||
|     .name                 = "CUDA", | ||||
|  | ||||
|     .device_hwctx_size    = sizeof(AVCUDADeviceContext), | ||||
|     .frames_priv_size     = sizeof(CUDAFramesContext), | ||||
|  | ||||
|     .frames_init          = cuda_frames_init, | ||||
|     .frames_get_buffer    = cuda_get_buffer, | ||||
|     .transfer_get_formats = cuda_transfer_get_formats, | ||||
|     .transfer_data_to     = cuda_transfer_data_to, | ||||
|     .transfer_data_from   = cuda_transfer_data_from, | ||||
|  | ||||
|     .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE }, | ||||
| }; | ||||
							
								
								
									
										46
									
								
								libavutil/hwcontext_cuda.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								libavutil/hwcontext_cuda.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| /* | ||||
|  * This file is part of Libav. | ||||
|  * | ||||
|  * Libav is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * Libav is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with Libav; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
|  | ||||
| #ifndef AVUTIL_HWCONTEXT_CUDA_H | ||||
| #define AVUTIL_HWCONTEXT_CUDA_H | ||||
|  | ||||
| #include <cuda.h> | ||||
|  | ||||
| #include "pixfmt.h" | ||||
|  | ||||
| /** | ||||
|  * @file | ||||
|  * An API-specific header for AV_HWDEVICE_TYPE_CUDA. | ||||
|  * | ||||
|  * This API supports dynamic frame pools. AVHWFramesContext.pool must return | ||||
|  * AVBufferRefs whose data pointer is a CUdeviceptr. | ||||
|  */ | ||||
|  | ||||
| /** | ||||
|  * This struct is allocated as AVHWDeviceContext.hwctx | ||||
|  */ | ||||
| typedef struct AVCUDADeviceContext { | ||||
|     CUcontext cuda_ctx; | ||||
| } AVCUDADeviceContext; | ||||
|  | ||||
| /** | ||||
|  * AVHWFramesContext.hwctx is currently not used | ||||
|  */ | ||||
|  | ||||
| #endif /* AVUTIL_HWCONTEXT_CUDA_H */ | ||||
| @@ -86,6 +86,7 @@ struct AVHWFramesInternal { | ||||
|     AVBufferPool *pool_internal; | ||||
| }; | ||||
|  | ||||
| extern const HWContextType ff_hwcontext_type_cuda; | ||||
| extern const HWContextType ff_hwcontext_type_vdpau; | ||||
|  | ||||
| #endif /* AVUTIL_HWCONTEXT_INTERNAL_H */ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Anton Khirnov
					Anton Khirnov