2014-11-30 00:04:37 +01:00
|
|
|
/*
|
|
|
|
* H.264 hardware encoding using nvidia nvenc
|
|
|
|
* Copyright (c) 2014 Timo Rothenpieler <timo@rothenpieler.org>
|
|
|
|
*
|
|
|
|
* This file is part of FFmpeg.
|
|
|
|
*
|
|
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
2014-12-25 14:55:31 +01:00
|
|
|
#if defined(_WIN32)
|
2014-11-30 00:04:37 +01:00
|
|
|
#include <windows.h>
|
|
|
|
#else
|
|
|
|
#include <dlfcn.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <nvEncodeAPI.h>
|
|
|
|
|
|
|
|
#include "libavutil/internal.h"
|
|
|
|
#include "libavutil/imgutils.h"
|
|
|
|
#include "libavutil/avassert.h"
|
|
|
|
#include "libavutil/opt.h"
|
|
|
|
#include "libavutil/mem.h"
|
|
|
|
#include "avcodec.h"
|
|
|
|
#include "internal.h"
|
|
|
|
#include "thread.h"
|
|
|
|
|
2014-12-25 14:55:31 +01:00
|
|
|
#if defined(_WIN32)
|
2014-11-30 00:04:37 +01:00
|
|
|
#define CUDAAPI __stdcall
|
|
|
|
#else
|
|
|
|
#define CUDAAPI
|
|
|
|
#endif
|
|
|
|
|
2014-12-25 14:55:31 +01:00
|
|
|
#if defined(_WIN32)
|
2014-11-30 00:04:37 +01:00
|
|
|
#define LOAD_FUNC(l, s) GetProcAddress(l, s)
|
|
|
|
#define DL_CLOSE_FUNC(l) FreeLibrary(l)
|
|
|
|
#else
|
|
|
|
#define LOAD_FUNC(l, s) dlsym(l, s)
|
|
|
|
#define DL_CLOSE_FUNC(l) dlclose(l)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef enum cudaError_enum {
|
|
|
|
CUDA_SUCCESS = 0
|
|
|
|
} CUresult;
|
|
|
|
typedef int CUdevice;
|
|
|
|
typedef void* CUcontext;
|
|
|
|
|
|
|
|
typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
|
|
|
|
typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
|
|
|
|
typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal);
|
|
|
|
typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev);
|
|
|
|
typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev);
|
|
|
|
typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev);
|
|
|
|
typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx);
|
|
|
|
typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx);
|
|
|
|
|
|
|
|
typedef NVENCSTATUS (NVENCAPI* PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList);
|
|
|
|
|
|
|
|
typedef struct NvencInputSurface
|
|
|
|
{
|
|
|
|
NV_ENC_INPUT_PTR input_surface;
|
|
|
|
int width;
|
|
|
|
int height;
|
|
|
|
|
|
|
|
int lockCount;
|
|
|
|
|
|
|
|
NV_ENC_BUFFER_FORMAT format;
|
|
|
|
} NvencInputSurface;
|
|
|
|
|
|
|
|
typedef struct NvencOutputSurface
|
|
|
|
{
|
|
|
|
NV_ENC_OUTPUT_PTR output_surface;
|
|
|
|
int size;
|
|
|
|
|
|
|
|
NvencInputSurface* input_surface;
|
|
|
|
|
|
|
|
int busy;
|
|
|
|
} NvencOutputSurface;
|
|
|
|
|
|
|
|
typedef struct NvencData
|
|
|
|
{
|
|
|
|
union {
|
|
|
|
int64_t timestamp;
|
|
|
|
NvencOutputSurface *surface;
|
2015-07-14 22:58:54 +02:00
|
|
|
} u;
|
2014-11-30 00:04:37 +01:00
|
|
|
} NvencData;
|
|
|
|
|
|
|
|
typedef struct NvencDataList
|
|
|
|
{
|
|
|
|
NvencData* data;
|
|
|
|
|
|
|
|
uint32_t pos;
|
|
|
|
uint32_t count;
|
|
|
|
uint32_t size;
|
|
|
|
} NvencDataList;
|
|
|
|
|
|
|
|
typedef struct NvencDynLoadFunctions
|
|
|
|
{
|
|
|
|
PCUINIT cu_init;
|
|
|
|
PCUDEVICEGETCOUNT cu_device_get_count;
|
|
|
|
PCUDEVICEGET cu_device_get;
|
|
|
|
PCUDEVICEGETNAME cu_device_get_name;
|
|
|
|
PCUDEVICECOMPUTECAPABILITY cu_device_compute_capability;
|
|
|
|
PCUCTXCREATE cu_ctx_create;
|
|
|
|
PCUCTXPOPCURRENT cu_ctx_pop_current;
|
|
|
|
PCUCTXDESTROY cu_ctx_destroy;
|
|
|
|
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST nvenc_funcs;
|
|
|
|
int nvenc_device_count;
|
|
|
|
CUdevice nvenc_devices[16];
|
|
|
|
|
2014-12-25 14:55:31 +01:00
|
|
|
#if defined(_WIN32)
|
2014-11-30 00:04:37 +01:00
|
|
|
HMODULE cuda_lib;
|
|
|
|
HMODULE nvenc_lib;
|
|
|
|
#else
|
|
|
|
void* cuda_lib;
|
|
|
|
void* nvenc_lib;
|
|
|
|
#endif
|
|
|
|
} NvencDynLoadFunctions;
|
|
|
|
|
2015-04-04 13:34:14 +02:00
|
|
|
typedef struct NvencValuePair
|
|
|
|
{
|
|
|
|
const char *str;
|
|
|
|
uint32_t num;
|
|
|
|
} NvencValuePair;
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
typedef struct NvencContext
|
|
|
|
{
|
|
|
|
AVClass *avclass;
|
|
|
|
|
|
|
|
NvencDynLoadFunctions nvenc_dload_funcs;
|
|
|
|
|
|
|
|
NV_ENC_INITIALIZE_PARAMS init_encode_params;
|
|
|
|
NV_ENC_CONFIG encode_config;
|
|
|
|
CUcontext cu_context;
|
|
|
|
|
|
|
|
int max_surface_count;
|
|
|
|
NvencInputSurface *input_surfaces;
|
|
|
|
NvencOutputSurface *output_surfaces;
|
|
|
|
|
|
|
|
NvencDataList output_surface_queue;
|
|
|
|
NvencDataList output_surface_ready_queue;
|
|
|
|
NvencDataList timestamp_list;
|
|
|
|
int64_t last_dts;
|
|
|
|
|
|
|
|
void *nvencoder;
|
|
|
|
|
|
|
|
char *preset;
|
2015-04-02 00:04:07 +02:00
|
|
|
char *profile;
|
2015-04-04 13:34:14 +02:00
|
|
|
char *level;
|
|
|
|
char *tier;
|
2014-11-30 00:04:37 +01:00
|
|
|
int cbr;
|
|
|
|
int twopass;
|
|
|
|
int gpu;
|
2015-07-25 23:20:28 +02:00
|
|
|
int buffer_delay;
|
2014-11-30 00:04:37 +01:00
|
|
|
} NvencContext;
|
|
|
|
|
2015-04-04 13:34:14 +02:00
|
|
|
static const NvencValuePair nvenc_h264_level_pairs[] = {
|
|
|
|
{ "auto", NV_ENC_LEVEL_AUTOSELECT },
|
|
|
|
{ "1" , NV_ENC_LEVEL_H264_1 },
|
|
|
|
{ "1.0" , NV_ENC_LEVEL_H264_1 },
|
|
|
|
{ "1b" , NV_ENC_LEVEL_H264_1b },
|
|
|
|
{ "1.0b", NV_ENC_LEVEL_H264_1b },
|
|
|
|
{ "1.1" , NV_ENC_LEVEL_H264_11 },
|
|
|
|
{ "1.2" , NV_ENC_LEVEL_H264_12 },
|
|
|
|
{ "1.3" , NV_ENC_LEVEL_H264_13 },
|
|
|
|
{ "2" , NV_ENC_LEVEL_H264_2 },
|
|
|
|
{ "2.0" , NV_ENC_LEVEL_H264_2 },
|
|
|
|
{ "2.1" , NV_ENC_LEVEL_H264_21 },
|
|
|
|
{ "2.2" , NV_ENC_LEVEL_H264_22 },
|
|
|
|
{ "3" , NV_ENC_LEVEL_H264_3 },
|
|
|
|
{ "3.0" , NV_ENC_LEVEL_H264_3 },
|
|
|
|
{ "3.1" , NV_ENC_LEVEL_H264_31 },
|
|
|
|
{ "3.2" , NV_ENC_LEVEL_H264_32 },
|
|
|
|
{ "4" , NV_ENC_LEVEL_H264_4 },
|
|
|
|
{ "4.0" , NV_ENC_LEVEL_H264_4 },
|
|
|
|
{ "4.1" , NV_ENC_LEVEL_H264_41 },
|
|
|
|
{ "4.2" , NV_ENC_LEVEL_H264_42 },
|
|
|
|
{ "5" , NV_ENC_LEVEL_H264_5 },
|
|
|
|
{ "5.0" , NV_ENC_LEVEL_H264_5 },
|
|
|
|
{ "5.1" , NV_ENC_LEVEL_H264_51 },
|
|
|
|
{ NULL }
|
|
|
|
};
|
|
|
|
|
2015-06-06 20:00:45 +02:00
|
|
|
static const NvencValuePair nvenc_hevc_level_pairs[] = {
|
2015-04-04 13:34:14 +02:00
|
|
|
{ "auto", NV_ENC_LEVEL_AUTOSELECT },
|
|
|
|
{ "1" , NV_ENC_LEVEL_HEVC_1 },
|
|
|
|
{ "1.0" , NV_ENC_LEVEL_HEVC_1 },
|
|
|
|
{ "2" , NV_ENC_LEVEL_HEVC_2 },
|
|
|
|
{ "2.0" , NV_ENC_LEVEL_HEVC_2 },
|
|
|
|
{ "2.1" , NV_ENC_LEVEL_HEVC_21 },
|
|
|
|
{ "3" , NV_ENC_LEVEL_HEVC_3 },
|
|
|
|
{ "3.0" , NV_ENC_LEVEL_HEVC_3 },
|
|
|
|
{ "3.1" , NV_ENC_LEVEL_HEVC_31 },
|
|
|
|
{ "4" , NV_ENC_LEVEL_HEVC_4 },
|
|
|
|
{ "4.0" , NV_ENC_LEVEL_HEVC_4 },
|
|
|
|
{ "4.1" , NV_ENC_LEVEL_HEVC_41 },
|
|
|
|
{ "5" , NV_ENC_LEVEL_HEVC_5 },
|
|
|
|
{ "5.0" , NV_ENC_LEVEL_HEVC_5 },
|
|
|
|
{ "5.1" , NV_ENC_LEVEL_HEVC_51 },
|
|
|
|
{ "5.2" , NV_ENC_LEVEL_HEVC_52 },
|
|
|
|
{ "6" , NV_ENC_LEVEL_HEVC_6 },
|
|
|
|
{ "6.0" , NV_ENC_LEVEL_HEVC_6 },
|
|
|
|
{ "6.1" , NV_ENC_LEVEL_HEVC_61 },
|
|
|
|
{ "6.2" , NV_ENC_LEVEL_HEVC_62 },
|
|
|
|
{ NULL }
|
|
|
|
};
|
|
|
|
|
|
|
|
static int input_string_to_uint32(AVCodecContext *avctx, const NvencValuePair *pair, const char *input, uint32_t *output)
|
|
|
|
{
|
|
|
|
for (; pair->str; ++pair) {
|
|
|
|
if (!strcmp(input, pair->str)) {
|
|
|
|
*output = pair->num;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return AVERROR(EINVAL);
|
|
|
|
}
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
static NvencData* data_queue_dequeue(NvencDataList* queue)
|
|
|
|
{
|
|
|
|
uint32_t mask;
|
|
|
|
uint32_t read_pos;
|
|
|
|
|
|
|
|
av_assert0(queue);
|
|
|
|
av_assert0(queue->size);
|
|
|
|
av_assert0(queue->data);
|
|
|
|
|
|
|
|
if (!queue->count)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* Size always is a multiple of two */
|
|
|
|
mask = queue->size - 1;
|
|
|
|
read_pos = (queue->pos - queue->count) & mask;
|
|
|
|
queue->count--;
|
|
|
|
|
|
|
|
return &queue->data[read_pos];
|
|
|
|
}
|
|
|
|
|
|
|
|
static int data_queue_enqueue(NvencDataList* queue, NvencData *data)
|
|
|
|
{
|
|
|
|
NvencDataList new_queue;
|
|
|
|
NvencData* tmp_data;
|
|
|
|
uint32_t mask;
|
|
|
|
|
|
|
|
if (!queue->size) {
|
|
|
|
/* size always has to be a multiple of two */
|
|
|
|
queue->size = 4;
|
|
|
|
queue->pos = 0;
|
|
|
|
queue->count = 0;
|
|
|
|
|
|
|
|
queue->data = av_malloc(queue->size * sizeof(*(queue->data)));
|
|
|
|
|
|
|
|
if (!queue->data) {
|
|
|
|
queue->size = 0;
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (queue->count == queue->size) {
|
|
|
|
new_queue.size = queue->size << 1;
|
|
|
|
new_queue.pos = 0;
|
|
|
|
new_queue.count = 0;
|
|
|
|
new_queue.data = av_malloc(new_queue.size * sizeof(*(queue->data)));
|
|
|
|
|
|
|
|
if (!new_queue.data)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
|
|
|
|
while (tmp_data = data_queue_dequeue(queue))
|
|
|
|
data_queue_enqueue(&new_queue, tmp_data);
|
|
|
|
|
|
|
|
av_free(queue->data);
|
|
|
|
*queue = new_queue;
|
|
|
|
}
|
|
|
|
|
|
|
|
mask = queue->size - 1;
|
|
|
|
|
|
|
|
queue->data[queue->pos] = *data;
|
|
|
|
queue->pos = (queue->pos + 1) & mask;
|
|
|
|
queue->count++;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int out_surf_queue_enqueue(NvencDataList* queue, NvencOutputSurface* surface)
|
|
|
|
{
|
|
|
|
NvencData data;
|
2015-07-14 22:58:54 +02:00
|
|
|
data.u.surface = surface;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
|
|
|
return data_queue_enqueue(queue, &data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static NvencOutputSurface* out_surf_queue_dequeue(NvencDataList* queue)
|
|
|
|
{
|
|
|
|
NvencData* res = data_queue_dequeue(queue);
|
|
|
|
|
|
|
|
if (!res)
|
|
|
|
return NULL;
|
|
|
|
|
2015-07-14 22:58:54 +02:00
|
|
|
return res->u.surface;
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int timestamp_queue_enqueue(NvencDataList* queue, int64_t timestamp)
|
|
|
|
{
|
|
|
|
NvencData data;
|
2015-07-14 22:58:54 +02:00
|
|
|
data.u.timestamp = timestamp;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
|
|
|
return data_queue_enqueue(queue, &data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t timestamp_queue_dequeue(NvencDataList* queue)
|
|
|
|
{
|
|
|
|
NvencData* res = data_queue_dequeue(queue);
|
|
|
|
|
|
|
|
if (!res)
|
|
|
|
return AV_NOPTS_VALUE;
|
|
|
|
|
2015-07-14 22:58:54 +02:00
|
|
|
return res->u.timestamp;
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#define CHECK_LOAD_FUNC(t, f, s) \
|
|
|
|
do { \
|
|
|
|
(f) = (t)LOAD_FUNC(dl_fn->cuda_lib, s); \
|
|
|
|
if (!(f)) { \
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
|
|
|
|
goto error; \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
static av_cold int nvenc_dyload_cuda(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
|
|
|
|
if (dl_fn->cuda_lib)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
#if defined(_WIN32)
|
|
|
|
dl_fn->cuda_lib = LoadLibrary(TEXT("nvcuda.dll"));
|
|
|
|
#else
|
|
|
|
dl_fn->cuda_lib = dlopen("libcuda.so", RTLD_LAZY);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!dl_fn->cuda_lib) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
CHECK_LOAD_FUNC(PCUINIT, dl_fn->cu_init, "cuInit");
|
|
|
|
CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, dl_fn->cu_device_get_count, "cuDeviceGetCount");
|
|
|
|
CHECK_LOAD_FUNC(PCUDEVICEGET, dl_fn->cu_device_get, "cuDeviceGet");
|
|
|
|
CHECK_LOAD_FUNC(PCUDEVICEGETNAME, dl_fn->cu_device_get_name, "cuDeviceGetName");
|
|
|
|
CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, dl_fn->cu_device_compute_capability, "cuDeviceComputeCapability");
|
|
|
|
CHECK_LOAD_FUNC(PCUCTXCREATE, dl_fn->cu_ctx_create, "cuCtxCreate_v2");
|
|
|
|
CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, dl_fn->cu_ctx_pop_current, "cuCtxPopCurrent_v2");
|
|
|
|
CHECK_LOAD_FUNC(PCUCTXDESTROY, dl_fn->cu_ctx_destroy, "cuCtxDestroy_v2");
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
|
|
|
if (dl_fn->cuda_lib)
|
|
|
|
DL_CLOSE_FUNC(dl_fn->cuda_lib);
|
|
|
|
|
|
|
|
dl_fn->cuda_lib = NULL;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold int check_cuda_errors(AVCodecContext *avctx, CUresult err, const char *func)
|
|
|
|
{
|
|
|
|
if (err != CUDA_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code 0x%x\n", func, err);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#define check_cuda_errors(f) if (!check_cuda_errors(avctx, f, #f)) goto error
|
|
|
|
|
|
|
|
static av_cold int nvenc_check_cuda(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
int device_count = 0;
|
|
|
|
CUdevice cu_device = 0;
|
|
|
|
char gpu_name[128];
|
|
|
|
int smminor = 0, smmajor = 0;
|
2015-03-24 05:34:59 +01:00
|
|
|
int i, smver, target_smver;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
|
2015-03-24 05:34:59 +01:00
|
|
|
switch (avctx->codec->id) {
|
|
|
|
case AV_CODEC_ID_H264:
|
2015-07-02 06:09:57 +02:00
|
|
|
target_smver = avctx->pix_fmt == AV_PIX_FMT_YUV444P ? 0x52 : 0x30;
|
2015-03-24 05:34:59 +01:00
|
|
|
break;
|
|
|
|
case AV_CODEC_ID_H265:
|
|
|
|
target_smver = 0x52;
|
|
|
|
break;
|
|
|
|
default:
|
2015-09-11 11:07:10 +02:00
|
|
|
av_log(avctx, AV_LOG_FATAL, "Unknown codec name\n");
|
2015-03-24 05:34:59 +01:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
if (!nvenc_dyload_cuda(avctx))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (dl_fn->nvenc_device_count > 0)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
check_cuda_errors(dl_fn->cu_init(0));
|
|
|
|
|
|
|
|
check_cuda_errors(dl_fn->cu_device_get_count(&device_count));
|
|
|
|
|
|
|
|
if (!device_count) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", device_count);
|
|
|
|
|
|
|
|
dl_fn->nvenc_device_count = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < device_count; ++i) {
|
|
|
|
check_cuda_errors(dl_fn->cu_device_get(&cu_device, i));
|
|
|
|
check_cuda_errors(dl_fn->cu_device_get_name(gpu_name, sizeof(gpu_name), cu_device));
|
|
|
|
check_cuda_errors(dl_fn->cu_device_compute_capability(&smmajor, &smminor, cu_device));
|
|
|
|
|
|
|
|
smver = (smmajor << 4) | smminor;
|
|
|
|
|
2015-03-24 05:34:59 +01:00
|
|
|
av_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, smmajor, smminor, (smver >= target_smver) ? "Available" : "Not Available");
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2015-03-24 05:34:59 +01:00
|
|
|
if (smver >= target_smver)
|
2014-11-30 00:04:37 +01:00
|
|
|
dl_fn->nvenc_devices[dl_fn->nvenc_device_count++] = cu_device;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!dl_fn->nvenc_device_count) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
|
|
|
dl_fn->nvenc_device_count = 0;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold int nvenc_dyload_nvenc(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0;
|
|
|
|
NVENCSTATUS nvstatus;
|
|
|
|
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
|
|
|
|
if (!nvenc_check_cuda(avctx))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (dl_fn->nvenc_lib)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
#if defined(_WIN32)
|
|
|
|
if (sizeof(void*) == 8) {
|
|
|
|
dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
|
|
|
|
} else {
|
|
|
|
dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI.dll"));
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
dl_fn->nvenc_lib = dlopen("libnvidia-encode.so.1", RTLD_LAZY);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!dl_fn->nvenc_lib) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
nvEncodeAPICreateInstance = (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(dl_fn->nvenc_lib, "NvEncodeAPICreateInstance");
|
|
|
|
|
|
|
|
if (!nvEncodeAPICreateInstance) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
|
|
|
|
|
|
|
|
nvstatus = nvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
|
|
|
|
|
|
|
|
if (nvstatus != NV_ENC_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Failed to create nvenc instance\n");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
error:
|
|
|
|
if (dl_fn->nvenc_lib)
|
|
|
|
DL_CLOSE_FUNC(dl_fn->nvenc_lib);
|
|
|
|
|
|
|
|
dl_fn->nvenc_lib = NULL;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold void nvenc_unload_nvenc(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
|
|
|
|
DL_CLOSE_FUNC(dl_fn->nvenc_lib);
|
|
|
|
dl_fn->nvenc_lib = NULL;
|
|
|
|
|
|
|
|
dl_fn->nvenc_device_count = 0;
|
|
|
|
|
|
|
|
DL_CLOSE_FUNC(dl_fn->cuda_lib);
|
|
|
|
dl_fn->cuda_lib = NULL;
|
|
|
|
|
|
|
|
dl_fn->cu_init = NULL;
|
|
|
|
dl_fn->cu_device_get_count = NULL;
|
|
|
|
dl_fn->cu_device_get = NULL;
|
|
|
|
dl_fn->cu_device_get_name = NULL;
|
|
|
|
dl_fn->cu_device_compute_capability = NULL;
|
|
|
|
dl_fn->cu_ctx_create = NULL;
|
|
|
|
dl_fn->cu_ctx_pop_current = NULL;
|
|
|
|
dl_fn->cu_ctx_destroy = NULL;
|
|
|
|
|
|
|
|
av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
|
|
|
|
}
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
static av_cold int nvenc_setup_device(AVCodecContext *avctx)
|
2014-11-30 00:04:37 +01:00
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
CUresult cu_res;
|
|
|
|
CUcontext cu_context_curr;
|
2015-01-16 01:02:40 +01:00
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
if (ctx->gpu >= dl_fn->nvenc_device_count) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->gpu, dl_fn->nvenc_device_count);
|
2016-05-20 16:49:24 +02:00
|
|
|
return AVERROR(EINVAL);
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
ctx->cu_context = NULL;
|
2015-09-11 04:54:08 +02:00
|
|
|
cu_res = dl_fn->cu_ctx_create(&ctx->cu_context, 4, dl_fn->nvenc_devices[ctx->gpu]); // CU_CTX_SCHED_BLOCKING_SYNC=4, avoid CPU spins
|
2014-11-30 00:04:37 +01:00
|
|
|
|
|
|
|
if (cu_res != CUDA_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
|
2016-05-20 16:49:24 +02:00
|
|
|
return AVERROR_EXTERNAL;
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
cu_res = dl_fn->cu_ctx_pop_current(&cu_context_curr);
|
|
|
|
|
|
|
|
if (cu_res != CUDA_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
|
2016-05-20 16:49:24 +02:00
|
|
|
return AVERROR_EXTERNAL;
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold int nvenc_open_session(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
|
|
|
|
|
|
|
|
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encode_session_params = { 0 };
|
|
|
|
NVENCSTATUS nv_status;
|
|
|
|
|
|
|
|
encode_session_params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
|
|
|
|
encode_session_params.apiVersion = NVENCAPI_VERSION;
|
2014-11-30 00:04:37 +01:00
|
|
|
encode_session_params.device = ctx->cu_context;
|
|
|
|
encode_session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
|
|
|
|
|
|
|
|
nv_status = p_nvenc->nvEncOpenEncodeSessionEx(&encode_session_params, &ctx->nvencoder);
|
|
|
|
if (nv_status != NV_ENC_SUCCESS) {
|
|
|
|
ctx->nvencoder = NULL;
|
2015-09-11 11:07:10 +02:00
|
|
|
av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x\n", (int)nv_status);
|
2016-05-20 16:49:24 +02:00
|
|
|
return AVERROR_EXTERNAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold void set_constqp(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
|
|
|
|
ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
|
|
|
|
ctx->encode_config.rcParams.constQP.qpInterB = avctx->global_quality;
|
|
|
|
ctx->encode_config.rcParams.constQP.qpInterP = avctx->global_quality;
|
|
|
|
ctx->encode_config.rcParams.constQP.qpIntra = avctx->global_quality;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold void set_vbr(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
|
|
|
|
ctx->encode_config.rcParams.enableMinQP = 1;
|
|
|
|
ctx->encode_config.rcParams.enableMaxQP = 1;
|
|
|
|
|
|
|
|
ctx->encode_config.rcParams.minQP.qpInterB = avctx->qmin;
|
|
|
|
ctx->encode_config.rcParams.minQP.qpInterP = avctx->qmin;
|
|
|
|
ctx->encode_config.rcParams.minQP.qpIntra = avctx->qmin;
|
|
|
|
|
|
|
|
ctx->encode_config.rcParams.maxQP.qpInterB = avctx->qmax;
|
|
|
|
ctx->encode_config.rcParams.maxQP.qpInterP = avctx->qmax;
|
|
|
|
ctx->encode_config.rcParams.maxQP.qpIntra = avctx->qmax;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold void set_lossless(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
|
|
|
|
ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
|
|
|
|
ctx->encode_config.rcParams.constQP.qpInterB = 0;
|
|
|
|
ctx->encode_config.rcParams.constQP.qpInterP = 0;
|
|
|
|
ctx->encode_config.rcParams.constQP.qpIntra = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx, int lossless)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
|
|
|
|
int qp_inter_p;
|
|
|
|
|
|
|
|
if (avctx->bit_rate > 0) {
|
|
|
|
ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
|
|
|
|
} else if (ctx->encode_config.rcParams.averageBitRate > 0) {
|
|
|
|
ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (avctx->rc_max_rate > 0)
|
|
|
|
ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
|
|
|
|
|
|
|
|
if (lossless) {
|
|
|
|
if (avctx->codec->id == AV_CODEC_ID_H264)
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.qpPrimeYZeroTransformBypassFlag = 1;
|
|
|
|
|
|
|
|
set_lossless(avctx);
|
|
|
|
|
|
|
|
avctx->qmin = -1;
|
|
|
|
avctx->qmax = -1;
|
|
|
|
} else if (ctx->cbr) {
|
|
|
|
if (!ctx->twopass) {
|
|
|
|
ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
|
|
|
|
} else {
|
|
|
|
ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
|
|
|
|
|
|
|
|
if (avctx->codec->id == AV_CODEC_ID_H264) {
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (avctx->codec->id == AV_CODEC_ID_H264) {
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.outputBufferingPeriodSEI = 1;
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.outputPictureTimingSEI = 1;
|
|
|
|
} else if (avctx->codec->id == AV_CODEC_ID_H265) {
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.outputBufferingPeriodSEI = 1;
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.outputPictureTimingSEI = 1;
|
|
|
|
}
|
|
|
|
} else if (avctx->global_quality > 0) {
|
|
|
|
set_constqp(avctx);
|
|
|
|
|
|
|
|
avctx->qmin = -1;
|
|
|
|
avctx->qmax = -1;
|
|
|
|
} else {
|
|
|
|
if (avctx->qmin >= 0 && avctx->qmax >= 0) {
|
|
|
|
set_vbr(avctx);
|
|
|
|
|
|
|
|
qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin
|
|
|
|
|
|
|
|
if (ctx->twopass) {
|
|
|
|
ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_VBR;
|
|
|
|
if (avctx->codec->id == AV_CODEC_ID_H264) {
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR_MINQP;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
qp_inter_p = 26; // default to 26
|
|
|
|
|
|
|
|
if (ctx->twopass) {
|
|
|
|
ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_VBR;
|
|
|
|
} else {
|
|
|
|
ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx->encode_config.rcParams.enableInitialRCQP = 1;
|
|
|
|
ctx->encode_config.rcParams.initialRCQP.qpInterP = qp_inter_p;
|
|
|
|
|
|
|
|
if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
|
|
|
|
ctx->encode_config.rcParams.initialRCQP.qpIntra = av_clip(
|
|
|
|
qp_inter_p * fabs(avctx->i_quant_factor) + avctx->i_quant_offset, 0, 51);
|
|
|
|
ctx->encode_config.rcParams.initialRCQP.qpInterB = av_clip(
|
|
|
|
qp_inter_p * fabs(avctx->b_quant_factor) + avctx->b_quant_offset, 0, 51);
|
|
|
|
} else {
|
|
|
|
ctx->encode_config.rcParams.initialRCQP.qpIntra = qp_inter_p;
|
|
|
|
ctx->encode_config.rcParams.initialRCQP.qpInterB = qp_inter_p;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (avctx->rc_buffer_size > 0) {
|
|
|
|
ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
|
|
|
|
} else if (ctx->encode_config.rcParams.averageBitRate > 0) {
|
|
|
|
ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx, int lossless)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
int res;
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = avctx->colorspace;
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = avctx->color_primaries;
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->color_trc;
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
|
|
|
|
|| avctx->pix_fmt == AV_PIX_FMT_YUVJ420P || avctx->pix_fmt == AV_PIX_FMT_YUVJ422P || avctx->pix_fmt == AV_PIX_FMT_YUVJ444P);
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag =
|
|
|
|
(avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag =
|
|
|
|
(ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag
|
|
|
|
|| ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFormat != 5
|
|
|
|
|| ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag != 0);
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.sliceMode = 3;
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData = 1;
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.outputAUD = 1;
|
|
|
|
|
|
|
|
if (!ctx->profile && !lossless) {
|
|
|
|
switch (avctx->profile) {
|
|
|
|
case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
|
|
|
|
break;
|
|
|
|
case FF_PROFILE_H264_BASELINE:
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
|
|
|
|
break;
|
|
|
|
case FF_PROFILE_H264_MAIN:
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
|
|
|
|
break;
|
|
|
|
case FF_PROFILE_H264_HIGH:
|
|
|
|
case FF_PROFILE_UNKNOWN:
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
av_log(avctx, AV_LOG_WARNING, "Unsupported profile requested, falling back to high\n");
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else if (!lossless) {
|
|
|
|
if (!strcmp(ctx->profile, "high")) {
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
|
|
|
|
avctx->profile = FF_PROFILE_H264_HIGH;
|
|
|
|
} else if (!strcmp(ctx->profile, "main")) {
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
|
|
|
|
avctx->profile = FF_PROFILE_H264_MAIN;
|
|
|
|
} else if (!strcmp(ctx->profile, "baseline")) {
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
|
|
|
|
avctx->profile = FF_PROFILE_H264_BASELINE;
|
|
|
|
} else if (!strcmp(ctx->profile, "high444p")) {
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
|
|
|
|
avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
|
|
|
|
} else {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Profile \"%s\" is unknown! Supported profiles: high, main, baseline\n", ctx->profile);
|
|
|
|
return AVERROR(EINVAL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// force setting profile as high444p if input is AV_PIX_FMT_YUV444P
|
|
|
|
if (avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
|
|
|
|
avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
|
|
|
|
|
|
|
|
if (ctx->level) {
|
|
|
|
res = input_string_to_uint32(avctx, nvenc_h264_level_pairs, ctx->level, &ctx->encode_config.encodeCodecConfig.h264Config.level);
|
|
|
|
|
|
|
|
if (res) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Level \"%s\" is unknown! Supported levels: auto, 1, 1b, 1.1, 1.2, 1.3, 2, 2.1, 2.2, 3, 3.1, 3.2, 4, 4.1, 4.2, 5, 5.1\n", ctx->level);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.level = NV_ENC_LEVEL_AUTOSELECT;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
int res;
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourMatrix = avctx->colorspace;
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourPrimaries = avctx->color_primaries;
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.transferCharacteristics = avctx->color_trc;
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
|
|
|
|
|| avctx->pix_fmt == AV_PIX_FMT_YUVJ420P || avctx->pix_fmt == AV_PIX_FMT_YUVJ422P || avctx->pix_fmt == AV_PIX_FMT_YUVJ444P);
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourDescriptionPresentFlag =
|
|
|
|
(avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoSignalTypePresentFlag =
|
|
|
|
(ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourDescriptionPresentFlag
|
|
|
|
|| ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFormat != 5
|
|
|
|
|| ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFullRangeFlag != 0);
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode = 3;
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData = 1;
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
|
|
|
|
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.outputAUD = 1;
|
|
|
|
|
|
|
|
/* No other profile is supported in the current SDK version 5 */
|
|
|
|
ctx->encode_config.profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
|
|
|
|
avctx->profile = FF_PROFILE_HEVC_MAIN;
|
|
|
|
|
|
|
|
if (ctx->level) {
|
|
|
|
res = input_string_to_uint32(avctx, nvenc_hevc_level_pairs, ctx->level, &ctx->encode_config.encodeCodecConfig.hevcConfig.level);
|
|
|
|
|
|
|
|
if (res) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Level \"%s\" is unknown! Supported levels: auto, 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, 6.1, 6.2\n", ctx->level);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.level = NV_ENC_LEVEL_AUTOSELECT;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ctx->tier) {
|
|
|
|
if (!strcmp(ctx->tier, "main")) {
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.tier = NV_ENC_TIER_HEVC_MAIN;
|
|
|
|
} else if (!strcmp(ctx->tier, "high")) {
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.tier = NV_ENC_TIER_HEVC_HIGH;
|
|
|
|
} else {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Tier \"%s\" is unknown! Supported tiers: main, high\n", ctx->tier);
|
|
|
|
return AVERROR(EINVAL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx, int lossless)
|
|
|
|
{
|
|
|
|
switch (avctx->codec->id) {
|
|
|
|
case AV_CODEC_ID_H264:
|
|
|
|
return nvenc_setup_h264_config(avctx, lossless);
|
|
|
|
case AV_CODEC_ID_H265:
|
|
|
|
return nvenc_setup_hevc_config(avctx);
|
|
|
|
/* Earlier switch/case will return if unknown codec is passed. */
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
|
|
|
|
|
|
|
|
NV_ENC_PRESET_CONFIG preset_config = { 0 };
|
|
|
|
GUID encoder_preset = NV_ENC_PRESET_HQ_GUID;
|
|
|
|
GUID codec;
|
|
|
|
NVENCSTATUS nv_status = NV_ENC_SUCCESS;
|
|
|
|
AVCPBProperties *cpb_props;
|
|
|
|
int num_mbs;
|
|
|
|
int isLL = 0;
|
|
|
|
int lossless = 0;
|
|
|
|
int res = 0;
|
|
|
|
int dw, dh;
|
|
|
|
|
|
|
|
ctx->last_dts = AV_NOPTS_VALUE;
|
|
|
|
|
|
|
|
ctx->encode_config.version = NV_ENC_CONFIG_VER;
|
|
|
|
ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
|
|
|
|
preset_config.version = NV_ENC_PRESET_CONFIG_VER;
|
|
|
|
preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
if (ctx->preset) {
|
2015-09-11 11:07:10 +02:00
|
|
|
if (!strcmp(ctx->preset, "slow")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_HQ_GUID;
|
|
|
|
ctx->twopass = 1;
|
|
|
|
} else if (!strcmp(ctx->preset, "medium")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_HQ_GUID;
|
|
|
|
ctx->twopass = 0;
|
|
|
|
} else if (!strcmp(ctx->preset, "fast")) {
|
2014-11-30 00:04:37 +01:00
|
|
|
encoder_preset = NV_ENC_PRESET_HP_GUID;
|
2015-09-11 11:07:10 +02:00
|
|
|
ctx->twopass = 0;
|
2014-11-30 00:04:37 +01:00
|
|
|
} else if (!strcmp(ctx->preset, "hq")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_HQ_GUID;
|
2015-09-11 11:07:10 +02:00
|
|
|
} else if (!strcmp(ctx->preset, "hp")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_HP_GUID;
|
2014-11-30 00:04:37 +01:00
|
|
|
} else if (!strcmp(ctx->preset, "bd")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_BD_GUID;
|
|
|
|
} else if (!strcmp(ctx->preset, "ll")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
|
|
|
|
isLL = 1;
|
|
|
|
} else if (!strcmp(ctx->preset, "llhp")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
|
|
|
|
isLL = 1;
|
|
|
|
} else if (!strcmp(ctx->preset, "llhq")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
|
|
|
|
isLL = 1;
|
2015-07-02 06:09:57 +02:00
|
|
|
} else if (!strcmp(ctx->preset, "lossless")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID;
|
|
|
|
lossless = 1;
|
|
|
|
} else if (!strcmp(ctx->preset, "losslesshp")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_LOSSLESS_HP_GUID;
|
|
|
|
lossless = 1;
|
2014-11-30 00:04:37 +01:00
|
|
|
} else if (!strcmp(ctx->preset, "default")) {
|
|
|
|
encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
|
|
|
|
} else {
|
2016-03-08 11:18:16 +01:00
|
|
|
av_log(avctx, AV_LOG_FATAL, "Preset \"%s\" is unknown! Supported presets: slow, medium, fast, hp, hq, bd, ll, llhp, llhq, lossless, losslesshp, default\n", ctx->preset);
|
2016-05-20 16:49:24 +02:00
|
|
|
return AVERROR(EINVAL);
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-11 11:07:10 +02:00
|
|
|
if (ctx->twopass < 0) {
|
|
|
|
ctx->twopass = isLL;
|
|
|
|
}
|
|
|
|
|
2015-03-24 05:34:59 +01:00
|
|
|
switch (avctx->codec->id) {
|
|
|
|
case AV_CODEC_ID_H264:
|
|
|
|
codec = NV_ENC_CODEC_H264_GUID;
|
|
|
|
break;
|
|
|
|
case AV_CODEC_ID_H265:
|
|
|
|
codec = NV_ENC_CODEC_HEVC_GUID;
|
|
|
|
break;
|
|
|
|
default:
|
2015-09-11 11:07:10 +02:00
|
|
|
av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
|
2016-05-20 16:49:24 +02:00
|
|
|
return AVERROR(EINVAL);
|
2015-03-24 05:34:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder, codec, encoder_preset, &preset_config);
|
2014-11-30 00:04:37 +01:00
|
|
|
if (nv_status != NV_ENC_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "GetEncodePresetConfig failed: 0x%x\n", (int)nv_status);
|
2016-05-20 16:49:24 +02:00
|
|
|
return AVERROR_EXTERNAL;
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
2015-03-24 05:34:59 +01:00
|
|
|
ctx->init_encode_params.encodeGUID = codec;
|
2014-11-30 00:04:37 +01:00
|
|
|
ctx->init_encode_params.encodeHeight = avctx->height;
|
|
|
|
ctx->init_encode_params.encodeWidth = avctx->width;
|
2015-01-26 13:28:22 +01:00
|
|
|
|
|
|
|
if (avctx->sample_aspect_ratio.num && avctx->sample_aspect_ratio.den &&
|
|
|
|
(avctx->sample_aspect_ratio.num != 1 || avctx->sample_aspect_ratio.num != 1)) {
|
|
|
|
av_reduce(&dw, &dh,
|
|
|
|
avctx->width * avctx->sample_aspect_ratio.num,
|
|
|
|
avctx->height * avctx->sample_aspect_ratio.den,
|
|
|
|
1024 * 1024);
|
|
|
|
ctx->init_encode_params.darHeight = dh;
|
|
|
|
ctx->init_encode_params.darWidth = dw;
|
|
|
|
} else {
|
|
|
|
ctx->init_encode_params.darHeight = avctx->height;
|
|
|
|
ctx->init_encode_params.darWidth = avctx->width;
|
|
|
|
}
|
|
|
|
|
avcodec/nvenc: De-compensate aspect ratio compensation of DVD-like content.
For reasons we are not privy to, nvidia decided that the nvenc encoder
should apply aspect ratio compensation to 'DVD like' content, assuming that
the content is not bt.601 compliant, but needs to be bt.601 compliant. In
this context, that means that they make the following, questionable,
assumptions:
1) If the input dimensions are 720x480 or 720x576, assume the content has
an active area of 704x480 or 704x576.
2) Assume that whatever the input sample aspect ratio is, it does not account
for the difference between 'physical' and 'active' dimensions.
From, these assumptions, they then conclude that they can 'help', by adjusting
the sample aspect ratio by a factor of 45/44. And indeed, if you wanted to
display only the 704 wide active area with the same aspect ratio as the full
720 wide image - this would be the correct adjustment factor, but what if you
don't? And more importantly, what if you're used to ffmpeg not making this kind
of adjustment at encode time - because none of the other encoders do this!
And, what if you had already accounted for bt.601 and your input had the
correct attributes? Well, it's going to apply the compensation anyway!
So, if you take some content, and feed it through nvenc repeatedly, it
will keep scaling the aspect ratio every time, stretching your video out
more and more and more.
So, clearly, regardless of whether you want to apply bt.601 aspect ratio
adjustments or not, this is not the way to do it. With any other ffmpeg
encoder, you would do it as part of defining your input paramters or
do the adjustment at playback time, and there's no reason by nvenc
should be any different.
This change adds some logic to undo the compensation that nvenc would
otherwise do.
nvidia engineers have told us that they will work to make this
compensation mechanism optional in a future release of the nvenc
SDK. At that point, we can adapt accordingly.
Signed-off-by: Philip Langdale <philipl@overt.org>
Reviewed-by: Timo Rothenpieler <timo@rothenpieler.org>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2015-01-28 18:05:53 +01:00
|
|
|
// De-compensate for hardware, dubiously, trying to compensate for
|
|
|
|
// playback at 704 pixel width.
|
|
|
|
if (avctx->width == 720 &&
|
|
|
|
(avctx->height == 480 || avctx->height == 576)) {
|
|
|
|
av_reduce(&dw, &dh,
|
|
|
|
ctx->init_encode_params.darWidth * 44,
|
|
|
|
ctx->init_encode_params.darHeight * 45,
|
2015-05-27 03:35:15 +02:00
|
|
|
1024 * 1024);
|
avcodec/nvenc: De-compensate aspect ratio compensation of DVD-like content.
For reasons we are not privy to, nvidia decided that the nvenc encoder
should apply aspect ratio compensation to 'DVD like' content, assuming that
the content is not bt.601 compliant, but needs to be bt.601 compliant. In
this context, that means that they make the following, questionable,
assumptions:
1) If the input dimensions are 720x480 or 720x576, assume the content has
an active area of 704x480 or 704x576.
2) Assume that whatever the input sample aspect ratio is, it does not account
for the difference between 'physical' and 'active' dimensions.
From, these assumptions, they then conclude that they can 'help', by adjusting
the sample aspect ratio by a factor of 45/44. And indeed, if you wanted to
display only the 704 wide active area with the same aspect ratio as the full
720 wide image - this would be the correct adjustment factor, but what if you
don't? And more importantly, what if you're used to ffmpeg not making this kind
of adjustment at encode time - because none of the other encoders do this!
And, what if you had already accounted for bt.601 and your input had the
correct attributes? Well, it's going to apply the compensation anyway!
So, if you take some content, and feed it through nvenc repeatedly, it
will keep scaling the aspect ratio every time, stretching your video out
more and more and more.
So, clearly, regardless of whether you want to apply bt.601 aspect ratio
adjustments or not, this is not the way to do it. With any other ffmpeg
encoder, you would do it as part of defining your input paramters or
do the adjustment at playback time, and there's no reason by nvenc
should be any different.
This change adds some logic to undo the compensation that nvenc would
otherwise do.
nvidia engineers have told us that they will work to make this
compensation mechanism optional in a future release of the nvenc
SDK. At that point, we can adapt accordingly.
Signed-off-by: Philip Langdale <philipl@overt.org>
Reviewed-by: Timo Rothenpieler <timo@rothenpieler.org>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2015-01-28 18:05:53 +01:00
|
|
|
ctx->init_encode_params.darHeight = dh;
|
|
|
|
ctx->init_encode_params.darWidth = dw;
|
|
|
|
}
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
ctx->init_encode_params.frameRateNum = avctx->time_base.den;
|
|
|
|
ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
|
|
|
|
|
|
|
|
num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
|
|
|
|
ctx->max_surface_count = (num_mbs >= 8160) ? 32 : 48;
|
|
|
|
|
2015-07-25 23:20:28 +02:00
|
|
|
if (ctx->buffer_delay >= ctx->max_surface_count)
|
|
|
|
ctx->buffer_delay = ctx->max_surface_count - 1;
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
ctx->init_encode_params.enableEncodeAsync = 0;
|
|
|
|
ctx->init_encode_params.enablePTD = 1;
|
|
|
|
|
|
|
|
ctx->init_encode_params.presetGUID = encoder_preset;
|
|
|
|
|
|
|
|
ctx->init_encode_params.encodeConfig = &ctx->encode_config;
|
|
|
|
memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));
|
|
|
|
ctx->encode_config.version = NV_ENC_CONFIG_VER;
|
|
|
|
|
2015-01-24 21:52:58 +01:00
|
|
|
if (avctx->refs >= 0) {
|
|
|
|
/* 0 means "let the hardware decide" */
|
2015-03-24 05:34:59 +01:00
|
|
|
switch (avctx->codec->id) {
|
|
|
|
case AV_CODEC_ID_H264:
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.maxNumRefFrames = avctx->refs;
|
|
|
|
break;
|
|
|
|
case AV_CODEC_ID_H265:
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB = avctx->refs;
|
|
|
|
break;
|
|
|
|
/* Earlier switch/case will return if unknown codec is passed. */
|
|
|
|
}
|
2015-01-24 21:52:58 +01:00
|
|
|
}
|
|
|
|
|
2015-01-26 13:28:21 +01:00
|
|
|
if (avctx->gop_size > 0) {
|
|
|
|
if (avctx->max_b_frames >= 0) {
|
|
|
|
/* 0 is intra-only, 1 is I/P only, 2 is one B Frame, 3 two B frames, and so on. */
|
|
|
|
ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1;
|
|
|
|
}
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
ctx->encode_config.gopLength = avctx->gop_size;
|
2015-03-24 05:34:59 +01:00
|
|
|
switch (avctx->codec->id) {
|
|
|
|
case AV_CODEC_ID_H264:
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.idrPeriod = avctx->gop_size;
|
|
|
|
break;
|
|
|
|
case AV_CODEC_ID_H265:
|
2016-05-20 16:49:24 +02:00
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.idrPeriod = avctx->gop_size;
|
|
|
|
break;
|
|
|
|
/* Earlier switch/case will return if unknown codec is passed. */
|
|
|
|
}
|
|
|
|
} else if (avctx->gop_size == 0) {
|
|
|
|
ctx->encode_config.frameIntervalP = 0;
|
|
|
|
ctx->encode_config.gopLength = 1;
|
|
|
|
switch (avctx->codec->id) {
|
|
|
|
case AV_CODEC_ID_H264:
|
|
|
|
ctx->encode_config.encodeCodecConfig.h264Config.idrPeriod = 1;
|
|
|
|
break;
|
|
|
|
case AV_CODEC_ID_H265:
|
|
|
|
ctx->encode_config.encodeCodecConfig.hevcConfig.idrPeriod = 1;
|
|
|
|
break;
|
|
|
|
/* Earlier switch/case will return if unknown codec is passed. */
|
2015-09-11 11:07:10 +02:00
|
|
|
}
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
/* when there're b frames, set dts offset */
|
|
|
|
if (ctx->encode_config.frameIntervalP >= 2)
|
|
|
|
ctx->last_dts = -2;
|
|
|
|
|
|
|
|
nvenc_setup_rate_control(avctx, lossless);
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2015-07-27 21:14:31 +02:00
|
|
|
if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
|
2014-11-30 00:04:37 +01:00
|
|
|
ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
|
|
|
|
} else {
|
|
|
|
ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
|
|
|
|
}
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
res = nvenc_setup_codec_config(avctx, lossless);
|
|
|
|
if (res)
|
|
|
|
return res;
|
2016-03-04 10:00:48 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
|
|
|
|
if (nv_status != NV_ENC_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "InitializeEncoder failed: 0x%x\n", (int)nv_status);
|
|
|
|
return AVERROR_EXTERNAL;
|
|
|
|
}
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
if (ctx->encode_config.frameIntervalP > 1)
|
|
|
|
avctx->has_b_frames = 2;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
if (ctx->encode_config.rcParams.averageBitRate > 0)
|
|
|
|
avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
|
2015-12-14 10:27:36 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
cpb_props = ff_add_cpb_side_data(avctx);
|
|
|
|
if (!cpb_props)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate;
|
|
|
|
cpb_props->avg_bitrate = avctx->bit_rate;
|
|
|
|
cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize;
|
2015-04-02 00:04:07 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2016-03-30 12:03:59 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
|
2015-04-04 13:34:14 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
NVENCSTATUS nv_status;
|
|
|
|
NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
|
|
|
|
NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
|
2015-10-31 16:00:39 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
|
|
|
|
allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
|
2015-07-02 06:09:57 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
allocSurf.width = (avctx->width + 31) & ~31;
|
|
|
|
allocSurf.height = (avctx->height + 31) & ~31;
|
2015-04-04 13:34:14 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
|
2015-04-04 13:34:14 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
switch (avctx->pix_fmt) {
|
|
|
|
case AV_PIX_FMT_YUV420P:
|
|
|
|
allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
|
2015-03-24 05:34:59 +01:00
|
|
|
break;
|
2016-03-04 10:00:48 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
case AV_PIX_FMT_NV12:
|
|
|
|
allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
|
|
|
|
break;
|
2016-03-04 10:00:48 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
case AV_PIX_FMT_YUV444P:
|
|
|
|
allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
|
|
|
|
break;
|
2016-03-04 10:00:48 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
default:
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
|
|
|
|
return AVERROR(EINVAL);
|
|
|
|
}
|
2015-12-14 10:27:36 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
|
|
|
|
if (nv_status != NV_ENC_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "CreateInputBuffer failed\n");
|
|
|
|
return AVERROR_EXTERNAL;
|
|
|
|
}
|
2015-04-02 00:04:07 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
ctx->input_surfaces[idx].lockCount = 0;
|
|
|
|
ctx->input_surfaces[idx].input_surface = allocSurf.inputBuffer;
|
|
|
|
ctx->input_surfaces[idx].format = allocSurf.bufferFmt;
|
|
|
|
ctx->input_surfaces[idx].width = allocSurf.width;
|
|
|
|
ctx->input_surfaces[idx].height = allocSurf.height;
|
2016-03-30 12:03:59 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
/* 1MB is large enough to hold most output frames. NVENC increases this automaticaly if it's not enough. */
|
|
|
|
allocOut.size = 1024 * 1024;
|
2015-04-04 13:34:14 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
|
2015-04-04 13:34:14 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
|
|
|
|
if (nv_status != NV_ENC_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "CreateBitstreamBuffer failed\n");
|
|
|
|
p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->input_surfaces[idx].input_surface);
|
|
|
|
return AVERROR_EXTERNAL;
|
|
|
|
}
|
2015-04-04 13:34:14 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
ctx->output_surfaces[idx].output_surface = allocOut.bitstreamBuffer;
|
|
|
|
ctx->output_surfaces[idx].size = allocOut.size;
|
|
|
|
ctx->output_surfaces[idx].busy = 0;
|
2015-04-04 13:34:14 +02:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx, int* surfaceCount)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
|
|
|
ctx->input_surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->input_surfaces));
|
|
|
|
|
|
|
|
if (!ctx->input_surfaces) {
|
2016-05-20 16:49:24 +02:00
|
|
|
return AVERROR(ENOMEM);
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
ctx->output_surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->output_surfaces));
|
|
|
|
|
|
|
|
if (!ctx->output_surfaces) {
|
2016-05-20 16:49:24 +02:00
|
|
|
return AVERROR(ENOMEM);
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
for (*surfaceCount = 0; *surfaceCount < ctx->max_surface_count; ++*surfaceCount) {
|
|
|
|
res = nvenc_alloc_surface(avctx, *surfaceCount);
|
|
|
|
if (res)
|
|
|
|
return res;
|
|
|
|
}
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
static av_cold int nvenc_setup_extradata(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
NVENCSTATUS nv_status;
|
|
|
|
uint32_t outSize = 0;
|
|
|
|
char tmpHeader[256];
|
|
|
|
NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
|
|
|
|
payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
payload.spsppsBuffer = tmpHeader;
|
|
|
|
payload.inBufferSize = sizeof(tmpHeader);
|
|
|
|
payload.outSPSPPSPayloadSize = &outSize;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
|
|
|
|
if (nv_status != NV_ENC_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "GetSequenceParams failed\n");
|
|
|
|
return AVERROR_EXTERNAL;
|
|
|
|
}
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
avctx->extradata_size = outSize;
|
|
|
|
avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE);
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
if (!avctx->extradata) {
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
}
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
memcpy(avctx->extradata, tmpHeader, outSize);
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
static av_cold int nvenc_encode_init(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
int res;
|
|
|
|
int i;
|
|
|
|
int surfaceCount = 0;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
if (!nvenc_dyload_nvenc(avctx))
|
|
|
|
return AVERROR_EXTERNAL;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
res = nvenc_setup_device(avctx);
|
|
|
|
if (res)
|
|
|
|
goto error;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
res = nvenc_open_session(avctx);
|
|
|
|
if (res)
|
|
|
|
goto error;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
res = nvenc_setup_encoder(avctx);
|
|
|
|
if (res)
|
|
|
|
goto error;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
res = nvenc_setup_surfaces(avctx, &surfaceCount);
|
|
|
|
if (res)
|
|
|
|
goto error;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
|
|
|
|
res = nvenc_setup_extradata(avctx);
|
|
|
|
if (res)
|
|
|
|
goto error;
|
|
|
|
}
|
2015-05-30 16:40:13 +02:00
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
|
|
|
for (i = 0; i < surfaceCount; ++i) {
|
|
|
|
p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->input_surfaces[i].input_surface);
|
|
|
|
if (ctx->output_surfaces[i].output_surface)
|
|
|
|
p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->output_surfaces[i].output_surface);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ctx->nvencoder)
|
|
|
|
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
|
|
|
|
|
|
|
|
if (ctx->cu_context)
|
|
|
|
dl_fn->cu_ctx_destroy(ctx->cu_context);
|
|
|
|
|
|
|
|
nvenc_unload_nvenc(avctx);
|
|
|
|
|
|
|
|
ctx->nvencoder = NULL;
|
|
|
|
ctx->cu_context = NULL;
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold int nvenc_encode_close(AVCodecContext *avctx)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
av_freep(&ctx->timestamp_list.data);
|
|
|
|
av_freep(&ctx->output_surface_ready_queue.data);
|
|
|
|
av_freep(&ctx->output_surface_queue.data);
|
|
|
|
|
|
|
|
for (i = 0; i < ctx->max_surface_count; ++i) {
|
|
|
|
p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->input_surfaces[i].input_surface);
|
|
|
|
p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->output_surfaces[i].output_surface);
|
|
|
|
}
|
|
|
|
ctx->max_surface_count = 0;
|
|
|
|
|
|
|
|
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
|
|
|
|
ctx->nvencoder = NULL;
|
|
|
|
|
|
|
|
dl_fn->cu_ctx_destroy(ctx->cu_context);
|
|
|
|
ctx->cu_context = NULL;
|
|
|
|
|
|
|
|
nvenc_unload_nvenc(avctx);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
static NvencInputSurface *get_free_frame(NvencContext *ctx)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ctx->max_surface_count; ++i) {
|
|
|
|
if (!ctx->input_surfaces[i].lockCount) {
|
|
|
|
ctx->input_surfaces[i].lockCount = 1;
|
|
|
|
return &ctx->input_surfaces[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvenc_copy_frame(AVCodecContext *avctx, NvencInputSurface *inSurf,
|
|
|
|
NV_ENC_LOCK_INPUT_BUFFER *lockBufferParams, const AVFrame *frame)
|
|
|
|
{
|
|
|
|
uint8_t *buf = lockBufferParams->bufferDataPtr;
|
|
|
|
int off = inSurf->height * lockBufferParams->pitch;
|
|
|
|
|
|
|
|
if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
|
|
|
|
av_image_copy_plane(buf, lockBufferParams->pitch,
|
|
|
|
frame->data[0], frame->linesize[0],
|
|
|
|
avctx->width, avctx->height);
|
|
|
|
|
|
|
|
buf += off;
|
|
|
|
|
|
|
|
av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
|
|
|
|
frame->data[2], frame->linesize[2],
|
|
|
|
avctx->width >> 1, avctx->height >> 1);
|
|
|
|
|
|
|
|
buf += off >> 2;
|
|
|
|
|
|
|
|
av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
|
|
|
|
frame->data[1], frame->linesize[1],
|
|
|
|
avctx->width >> 1, avctx->height >> 1);
|
|
|
|
} else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
|
|
|
|
av_image_copy_plane(buf, lockBufferParams->pitch,
|
|
|
|
frame->data[0], frame->linesize[0],
|
|
|
|
avctx->width, avctx->height);
|
|
|
|
|
|
|
|
buf += off;
|
|
|
|
|
|
|
|
av_image_copy_plane(buf, lockBufferParams->pitch,
|
|
|
|
frame->data[1], frame->linesize[1],
|
|
|
|
avctx->width, avctx->height >> 1);
|
|
|
|
} else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
|
|
|
|
av_image_copy_plane(buf, lockBufferParams->pitch,
|
|
|
|
frame->data[0], frame->linesize[0],
|
|
|
|
avctx->width, avctx->height);
|
|
|
|
|
|
|
|
buf += off;
|
|
|
|
|
|
|
|
av_image_copy_plane(buf, lockBufferParams->pitch,
|
|
|
|
frame->data[1], frame->linesize[1],
|
|
|
|
avctx->width, avctx->height);
|
|
|
|
|
|
|
|
buf += off;
|
|
|
|
|
|
|
|
av_image_copy_plane(buf, lockBufferParams->pitch,
|
|
|
|
frame->data[2], frame->linesize[2],
|
|
|
|
avctx->width, avctx->height);
|
|
|
|
} else {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
|
|
|
|
return AVERROR(EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
|
|
|
|
NvencInputSurface *nvenc_frame)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
|
|
|
|
|
|
|
|
int res;
|
|
|
|
NVENCSTATUS nv_status;
|
|
|
|
NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
|
|
|
|
|
|
|
|
lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
|
|
|
|
lockBufferParams.inputBuffer = nvenc_frame->input_surface;
|
|
|
|
|
|
|
|
nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
|
|
|
|
if (nv_status != NV_ENC_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed locking nvenc input buffer\n");
|
|
|
|
return AVERROR_EXTERNAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
res = nvenc_copy_frame(avctx, nvenc_frame, &lockBufferParams, frame);
|
|
|
|
|
|
|
|
nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, nvenc_frame->input_surface);
|
|
|
|
if (nv_status != NV_ENC_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "Failed unlocking input buffer!\n");
|
|
|
|
return AVERROR_EXTERNAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
|
|
|
|
NV_ENC_PIC_PARAMS *params)
|
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
|
|
|
|
switch (avctx->codec->id) {
|
|
|
|
case AV_CODEC_ID_H264:
|
|
|
|
params->codecPicParams.h264PicParams.sliceMode = ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
|
|
|
|
params->codecPicParams.h264PicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
|
|
|
|
break;
|
|
|
|
case AV_CODEC_ID_H265:
|
|
|
|
params->codecPicParams.hevcPicParams.sliceMode = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
|
|
|
|
params->codecPicParams.hevcPicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-25 23:26:42 +02:00
|
|
|
static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencOutputSurface *tmpoutsurf)
|
2014-11-30 00:04:37 +01:00
|
|
|
{
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
|
|
|
|
|
2015-03-24 05:34:59 +01:00
|
|
|
uint32_t slice_mode_data;
|
|
|
|
uint32_t *slice_offsets;
|
2014-11-30 00:04:37 +01:00
|
|
|
NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
|
|
|
|
NVENCSTATUS nv_status;
|
|
|
|
int res = 0;
|
|
|
|
|
2016-03-08 00:47:56 +01:00
|
|
|
enum AVPictureType pict_type;
|
|
|
|
|
2015-03-24 05:34:59 +01:00
|
|
|
switch (avctx->codec->id) {
|
|
|
|
case AV_CODEC_ID_H264:
|
|
|
|
slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
|
|
|
|
break;
|
|
|
|
case AV_CODEC_ID_H265:
|
|
|
|
slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
|
|
|
|
break;
|
|
|
|
default:
|
2015-09-11 11:07:10 +02:00
|
|
|
av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
|
2015-03-24 05:34:59 +01:00
|
|
|
res = AVERROR(EINVAL);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
if (!slice_offsets)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
|
|
|
|
lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
|
|
|
|
|
|
|
|
lock_params.doNotWait = 0;
|
|
|
|
lock_params.outputBitstream = tmpoutsurf->output_surface;
|
|
|
|
lock_params.sliceOffsets = slice_offsets;
|
|
|
|
|
|
|
|
nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params);
|
|
|
|
if (nv_status != NV_ENC_SUCCESS) {
|
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed locking bitstream buffer\n");
|
|
|
|
res = AVERROR_EXTERNAL;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2015-09-11 11:07:10 +02:00
|
|
|
if (res = ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes,0)) {
|
2014-11-30 00:04:37 +01:00
|
|
|
p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
|
|
|
|
|
|
|
|
nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
|
|
|
|
if (nv_status != NV_ENC_SUCCESS)
|
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed unlocking bitstream buffer, expect the gates of mordor to open\n");
|
|
|
|
|
|
|
|
switch (lock_params.pictureType) {
|
|
|
|
case NV_ENC_PIC_TYPE_IDR:
|
|
|
|
pkt->flags |= AV_PKT_FLAG_KEY;
|
|
|
|
case NV_ENC_PIC_TYPE_I:
|
2016-03-08 00:47:56 +01:00
|
|
|
pict_type = AV_PICTURE_TYPE_I;
|
2014-11-30 00:04:37 +01:00
|
|
|
break;
|
|
|
|
case NV_ENC_PIC_TYPE_P:
|
2016-03-08 00:47:56 +01:00
|
|
|
pict_type = AV_PICTURE_TYPE_P;
|
2014-11-30 00:04:37 +01:00
|
|
|
break;
|
|
|
|
case NV_ENC_PIC_TYPE_B:
|
2016-03-08 00:47:56 +01:00
|
|
|
pict_type = AV_PICTURE_TYPE_B;
|
2014-11-30 00:04:37 +01:00
|
|
|
break;
|
|
|
|
case NV_ENC_PIC_TYPE_BI:
|
2016-03-08 00:47:56 +01:00
|
|
|
pict_type = AV_PICTURE_TYPE_BI;
|
2014-11-30 00:04:37 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n");
|
|
|
|
av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n");
|
|
|
|
res = AVERROR_EXTERNAL;
|
|
|
|
goto error;
|
2016-03-08 00:47:56 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#if FF_API_CODED_FRAME
|
|
|
|
FF_DISABLE_DEPRECATION_WARNINGS
|
|
|
|
avctx->coded_frame->pict_type = pict_type;
|
2015-07-15 19:41:22 +02:00
|
|
|
FF_ENABLE_DEPRECATION_WARNINGS
|
|
|
|
#endif
|
2016-03-08 00:47:56 +01:00
|
|
|
|
|
|
|
ff_side_data_set_encoder_stats(pkt,
|
|
|
|
(lock_params.frameAvgQP - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
|
2014-11-30 00:04:37 +01:00
|
|
|
|
|
|
|
pkt->pts = lock_params.outputTimeStamp;
|
|
|
|
pkt->dts = timestamp_queue_dequeue(&ctx->timestamp_list);
|
|
|
|
|
2015-01-26 13:28:21 +01:00
|
|
|
/* when there're b frame(s), set dts offset */
|
2015-01-07 10:19:32 +01:00
|
|
|
if (ctx->encode_config.frameIntervalP >= 2)
|
|
|
|
pkt->dts -= 1;
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
if (pkt->dts > pkt->pts)
|
|
|
|
pkt->dts = pkt->pts;
|
|
|
|
|
|
|
|
if (ctx->last_dts != AV_NOPTS_VALUE && pkt->dts <= ctx->last_dts)
|
|
|
|
pkt->dts = ctx->last_dts + 1;
|
|
|
|
|
|
|
|
ctx->last_dts = pkt->dts;
|
|
|
|
|
|
|
|
av_free(slice_offsets);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
|
|
|
av_free(slice_offsets);
|
|
|
|
timestamp_queue_dequeue(&ctx->timestamp_list);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
|
|
|
const AVFrame *frame, int *got_packet)
|
|
|
|
{
|
|
|
|
NVENCSTATUS nv_status;
|
|
|
|
NvencOutputSurface *tmpoutsurf;
|
|
|
|
int res, i = 0;
|
|
|
|
|
|
|
|
NvencContext *ctx = avctx->priv_data;
|
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
|
|
|
|
|
|
|
|
NV_ENC_PIC_PARAMS pic_params = { 0 };
|
|
|
|
pic_params.version = NV_ENC_PIC_PARAMS_VER;
|
|
|
|
|
|
|
|
if (frame) {
|
2016-05-20 16:49:24 +02:00
|
|
|
NvencInputSurface *inSurf;
|
2014-11-30 00:04:37 +01:00
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
inSurf = get_free_frame(ctx);
|
2014-11-30 00:04:37 +01:00
|
|
|
av_assert0(inSurf);
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
res = nvenc_upload_frame(avctx, frame, inSurf);
|
|
|
|
if (res) {
|
|
|
|
inSurf->lockCount = 0;
|
|
|
|
return res;
|
2014-11-30 00:04:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ctx->max_surface_count; ++i)
|
|
|
|
if (!ctx->output_surfaces[i].busy)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (i == ctx->max_surface_count) {
|
|
|
|
inSurf->lockCount = 0;
|
|
|
|
av_log(avctx, AV_LOG_FATAL, "No free output surface found!\n");
|
|
|
|
return AVERROR_EXTERNAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx->output_surfaces[i].input_surface = inSurf;
|
|
|
|
|
|
|
|
pic_params.inputBuffer = inSurf->input_surface;
|
|
|
|
pic_params.bufferFmt = inSurf->format;
|
|
|
|
pic_params.inputWidth = avctx->width;
|
|
|
|
pic_params.inputHeight = avctx->height;
|
|
|
|
pic_params.outputBitstream = ctx->output_surfaces[i].output_surface;
|
|
|
|
pic_params.completionEvent = 0;
|
|
|
|
|
2015-06-29 21:59:37 +02:00
|
|
|
if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
|
2014-11-30 00:04:37 +01:00
|
|
|
if (frame->top_field_first) {
|
|
|
|
pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
|
|
|
|
} else {
|
|
|
|
pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
|
|
|
|
}
|
|
|
|
|
|
|
|
pic_params.encodePicFlags = 0;
|
|
|
|
pic_params.inputTimeStamp = frame->pts;
|
|
|
|
pic_params.inputDuration = 0;
|
2016-05-20 16:49:24 +02:00
|
|
|
|
|
|
|
nvenc_codec_specific_pic_params(avctx, &pic_params);
|
2015-01-16 01:02:40 +01:00
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
res = timestamp_queue_enqueue(&ctx->timestamp_list, frame->pts);
|
|
|
|
|
2016-05-20 16:49:24 +02:00
|
|
|
if (res) {
|
|
|
|
inSurf->lockCount = 0;
|
2014-11-30 00:04:37 +01:00
|
|
|
return res;
|
2016-05-20 16:49:24 +02:00
|
|
|
}
|
2014-11-30 00:04:37 +01:00
|
|
|
} else {
|
|
|
|
pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
|
|
|
|
}
|
|
|
|
|
|
|
|
nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
|
|
|
|
|
|
|
|
if (frame && nv_status == NV_ENC_ERR_NEED_MORE_INPUT) {
|
|
|
|
res = out_surf_queue_enqueue(&ctx->output_surface_queue, &ctx->output_surfaces[i]);
|
|
|
|
|
|
|
|
if (res)
|
|
|
|
return res;
|
|
|
|
|
|
|
|
ctx->output_surfaces[i].busy = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nv_status != NV_ENC_SUCCESS && nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
|
|
|
|
av_log(avctx, AV_LOG_ERROR, "EncodePicture failed!\n");
|
|
|
|
return AVERROR_EXTERNAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
|
|
|
|
while (ctx->output_surface_queue.count) {
|
|
|
|
tmpoutsurf = out_surf_queue_dequeue(&ctx->output_surface_queue);
|
|
|
|
res = out_surf_queue_enqueue(&ctx->output_surface_ready_queue, tmpoutsurf);
|
|
|
|
|
|
|
|
if (res)
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (frame) {
|
|
|
|
res = out_surf_queue_enqueue(&ctx->output_surface_ready_queue, &ctx->output_surfaces[i]);
|
|
|
|
|
|
|
|
if (res)
|
|
|
|
return res;
|
|
|
|
|
|
|
|
ctx->output_surfaces[i].busy = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-25 23:20:28 +02:00
|
|
|
if (ctx->output_surface_ready_queue.count && (!frame || ctx->output_surface_ready_queue.count + ctx->output_surface_queue.count >= ctx->buffer_delay)) {
|
2014-11-30 00:04:37 +01:00
|
|
|
tmpoutsurf = out_surf_queue_dequeue(&ctx->output_surface_ready_queue);
|
|
|
|
|
2015-07-25 23:26:42 +02:00
|
|
|
res = process_output_surface(avctx, pkt, tmpoutsurf);
|
2014-11-30 00:04:37 +01:00
|
|
|
|
|
|
|
if (res)
|
|
|
|
return res;
|
|
|
|
|
|
|
|
tmpoutsurf->busy = 0;
|
|
|
|
av_assert0(tmpoutsurf->input_surface->lockCount);
|
|
|
|
tmpoutsurf->input_surface->lockCount--;
|
|
|
|
|
|
|
|
*got_packet = 1;
|
|
|
|
} else {
|
|
|
|
*got_packet = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-05-02 15:00:35 +02:00
|
|
|
static const enum AVPixelFormat pix_fmts_nvenc[] = {
|
2015-06-07 05:28:22 +02:00
|
|
|
AV_PIX_FMT_YUV420P,
|
2014-11-30 00:04:37 +01:00
|
|
|
AV_PIX_FMT_NV12,
|
2015-07-02 06:09:57 +02:00
|
|
|
AV_PIX_FMT_YUV444P,
|
2014-11-30 00:04:37 +01:00
|
|
|
AV_PIX_FMT_NONE
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OFFSET(x) offsetof(NvencContext, x)
|
|
|
|
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
|
|
|
|
static const AVOption options[] = {
|
2016-04-27 22:22:29 +02:00
|
|
|
{ "preset", "Set the encoding preset (one of slow = hq 2pass, medium = hq, fast = hp, hq, hp, bd, ll, llhq, llhp, lossless, losslesshp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "medium" }, 0, 0, VE },
|
2015-11-09 13:05:02 +01:00
|
|
|
{ "profile", "Set the encoding profile (high, main, baseline or high444p)", OFFSET(profile), AV_OPT_TYPE_STRING, { .str = "main" }, 0, 0, VE },
|
|
|
|
{ "level", "Set the encoding level restriction (auto, 1.0, 1.0b, 1.1, 1.2, ..., 4.2, 5.0, 5.1)", OFFSET(level), AV_OPT_TYPE_STRING, { .str = "auto" }, 0, 0, VE },
|
|
|
|
{ "tier", "Set the encoding tier (main or high)", OFFSET(tier), AV_OPT_TYPE_STRING, { .str = "main" }, 0, 0, VE },
|
2015-09-11 11:07:10 +02:00
|
|
|
{ "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
|
|
|
|
{ "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
|
2014-11-30 00:04:37 +01:00
|
|
|
{ "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
|
2015-07-25 23:20:28 +02:00
|
|
|
{ "delay", "Delays frame output by the given amount of frames.", OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
|
2014-11-30 00:04:37 +01:00
|
|
|
{ NULL }
|
|
|
|
};
|
|
|
|
|
|
|
|
static const AVCodecDefault nvenc_defaults[] = {
|
2015-11-09 13:05:02 +01:00
|
|
|
{ "b", "2M" },
|
2014-11-30 00:04:37 +01:00
|
|
|
{ "qmin", "-1" },
|
|
|
|
{ "qmax", "-1" },
|
|
|
|
{ "qdiff", "-1" },
|
|
|
|
{ "qblur", "-1" },
|
|
|
|
{ "qcomp", "-1" },
|
2015-11-09 13:05:02 +01:00
|
|
|
{ "g", "250" },
|
|
|
|
{ "bf", "0" },
|
2014-11-30 00:04:37 +01:00
|
|
|
{ NULL },
|
|
|
|
};
|
|
|
|
|
2015-03-24 05:34:59 +01:00
|
|
|
#if CONFIG_NVENC_ENCODER
|
2015-03-25 23:24:30 +01:00
|
|
|
static const AVClass nvenc_class = {
|
|
|
|
.class_name = "nvenc",
|
|
|
|
.item_name = av_default_item_name,
|
|
|
|
.option = options,
|
|
|
|
.version = LIBAVUTIL_VERSION_INT,
|
|
|
|
};
|
|
|
|
|
2014-11-30 00:04:37 +01:00
|
|
|
AVCodec ff_nvenc_encoder = {
|
|
|
|
.name = "nvenc",
|
2015-09-11 11:07:10 +02:00
|
|
|
.long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC h264 encoder"),
|
2014-11-30 00:04:37 +01:00
|
|
|
.type = AVMEDIA_TYPE_VIDEO,
|
|
|
|
.id = AV_CODEC_ID_H264,
|
|
|
|
.priv_data_size = sizeof(NvencContext),
|
|
|
|
.init = nvenc_encode_init,
|
|
|
|
.encode2 = nvenc_encode_frame,
|
|
|
|
.close = nvenc_encode_close,
|
2015-07-27 22:21:19 +02:00
|
|
|
.capabilities = AV_CODEC_CAP_DELAY,
|
2014-11-30 00:04:37 +01:00
|
|
|
.priv_class = &nvenc_class,
|
|
|
|
.defaults = nvenc_defaults,
|
|
|
|
.pix_fmts = pix_fmts_nvenc,
|
|
|
|
};
|
2015-03-24 05:34:59 +01:00
|
|
|
#endif
|
|
|
|
|
2015-06-06 20:09:15 +02:00
|
|
|
/* Add an alias for nvenc_h264 */
|
|
|
|
#if CONFIG_NVENC_H264_ENCODER
|
|
|
|
static const AVClass nvenc_h264_class = {
|
|
|
|
.class_name = "nvenc_h264",
|
|
|
|
.item_name = av_default_item_name,
|
|
|
|
.option = options,
|
|
|
|
.version = LIBAVUTIL_VERSION_INT,
|
|
|
|
};
|
|
|
|
|
|
|
|
AVCodec ff_nvenc_h264_encoder = {
|
|
|
|
.name = "nvenc_h264",
|
2015-09-11 11:07:10 +02:00
|
|
|
.long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC h264 encoder"),
|
2015-06-06 20:09:15 +02:00
|
|
|
.type = AVMEDIA_TYPE_VIDEO,
|
|
|
|
.id = AV_CODEC_ID_H264,
|
|
|
|
.priv_data_size = sizeof(NvencContext),
|
|
|
|
.init = nvenc_encode_init,
|
|
|
|
.encode2 = nvenc_encode_frame,
|
|
|
|
.close = nvenc_encode_close,
|
2015-07-27 22:21:19 +02:00
|
|
|
.capabilities = AV_CODEC_CAP_DELAY,
|
2015-06-06 20:09:15 +02:00
|
|
|
.priv_class = &nvenc_h264_class,
|
|
|
|
.defaults = nvenc_defaults,
|
|
|
|
.pix_fmts = pix_fmts_nvenc,
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2015-06-06 20:00:45 +02:00
|
|
|
#if CONFIG_NVENC_HEVC_ENCODER
|
|
|
|
static const AVClass nvenc_hevc_class = {
|
|
|
|
.class_name = "nvenc_hevc",
|
2015-03-25 23:24:30 +01:00
|
|
|
.item_name = av_default_item_name,
|
|
|
|
.option = options,
|
|
|
|
.version = LIBAVUTIL_VERSION_INT,
|
|
|
|
};
|
|
|
|
|
2015-06-06 20:00:45 +02:00
|
|
|
AVCodec ff_nvenc_hevc_encoder = {
|
|
|
|
.name = "nvenc_hevc",
|
2015-09-11 11:07:10 +02:00
|
|
|
.long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC hevc encoder"),
|
2015-03-24 05:34:59 +01:00
|
|
|
.type = AVMEDIA_TYPE_VIDEO,
|
|
|
|
.id = AV_CODEC_ID_H265,
|
|
|
|
.priv_data_size = sizeof(NvencContext),
|
|
|
|
.init = nvenc_encode_init,
|
|
|
|
.encode2 = nvenc_encode_frame,
|
|
|
|
.close = nvenc_encode_close,
|
2015-07-27 22:21:19 +02:00
|
|
|
.capabilities = AV_CODEC_CAP_DELAY,
|
2015-06-06 20:00:45 +02:00
|
|
|
.priv_class = &nvenc_hevc_class,
|
2015-03-24 05:34:59 +01:00
|
|
|
.defaults = nvenc_defaults,
|
|
|
|
.pix_fmts = pix_fmts_nvenc,
|
|
|
|
};
|
|
|
|
#endif
|