vp9: Updates to SVC sample encoder.

Allow for passing in the layer bitrates at command line.
Fix to allow passing in bitrate for each spatial-temporal layer.

Change to some default values for 1 pass cbr mode:
spatial scale and qp-max/min.

Small fixes to some build warnings.

Change-Id: I3f9a776262712480a6570bb863a835b2fc49935a
This commit is contained in:
Marco 2016-10-27 15:38:02 -07:00
parent 4d305dab34
commit a8fdb3926e
3 changed files with 44 additions and 12 deletions

View File

@ -84,6 +84,8 @@ static const arg_def_t speed_arg =
ARG_DEF("sp", "speed", 1, "speed configuration"); ARG_DEF("sp", "speed", 1, "speed configuration");
static const arg_def_t aqmode_arg = static const arg_def_t aqmode_arg =
ARG_DEF("aq", "aqmode", 1, "aq-mode off/on"); ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
static const arg_def_t bitrates_arg =
ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]");
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = { static const struct arg_enum_list bitdepth_enum[] = {
@ -124,6 +126,7 @@ static const arg_def_t *svc_args[] = { &frames_arg,
#endif #endif
&speed_arg, &speed_arg,
&rc_end_usage_arg, &rc_end_usage_arg,
&bitrates_arg,
NULL }; NULL };
static const uint32_t default_frames_to_skip = 0; static const uint32_t default_frames_to_skip = 0;
@ -250,6 +253,9 @@ static void parse_command_line(int argc, const char **argv_,
} else if (arg_match(&arg, &scale_factors_arg, argi)) { } else if (arg_match(&arg, &scale_factors_arg, argi)) {
snprintf(string_options, sizeof(string_options), "%s scale-factors=%s", snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
string_options, arg.val); string_options, arg.val);
} else if (arg_match(&arg, &bitrates_arg, argi)) {
snprintf(string_options, sizeof(string_options), "%s bitrates=%s",
string_options, arg.val);
} else if (arg_match(&arg, &passes_arg, argi)) { } else if (arg_match(&arg, &passes_arg, argi)) {
passes = arg_parse_uint(&arg); passes = arg_parse_uint(&arg);
if (passes < 1 || passes > 2) { if (passes < 1 || passes > 2) {
@ -620,7 +626,7 @@ int main(int argc, const char **argv) {
struct RateControlStats rc; struct RateControlStats rc;
vpx_svc_layer_id_t layer_id; vpx_svc_layer_id_t layer_id;
vpx_svc_ref_frame_config_t ref_frame_config; vpx_svc_ref_frame_config_t ref_frame_config;
int sl, tl; unsigned int sl, tl;
double sum_bitrate = 0.0; double sum_bitrate = 0.0;
double sum_bitrate2 = 0.0; double sum_bitrate2 = 0.0;
double framerate = 30.0; double framerate = 30.0;
@ -695,6 +701,8 @@ int main(int argc, const char **argv) {
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1)); vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1) if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
if (svc_ctx.speed >= 5)
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
// Encode frames // Encode frames
while (!end_of_stream) { while (!end_of_stream) {
@ -730,7 +738,7 @@ int main(int argc, const char **argv) {
&ref_frame_config); &ref_frame_config);
// Keep track of input frames, to account for frame drops in rate control // Keep track of input frames, to account for frame drops in rate control
// stats/metrics. // stats/metrics.
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { for (sl = 0; sl < (unsigned int)enc_cfg.ss_number_layers; ++sl) {
++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
layer_id.temporal_layer_id]; layer_id.temporal_layer_id];
} }
@ -793,7 +801,7 @@ int main(int argc, const char **argv) {
rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
// Keep count of rate control stats per layer, for non-key // Keep count of rate control stats per layer, for non-key
// frames. // frames.
if (tl == layer_id.temporal_layer_id && if (tl == (unsigned int)layer_id.temporal_layer_id &&
!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl]; rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
rc.layer_avg_rate_mismatch[layer] += rc.layer_avg_rate_mismatch[layer] +=
@ -807,7 +815,7 @@ int main(int argc, const char **argv) {
// Update for short-time encoding bitrate states, for moving // Update for short-time encoding bitrate states, for moving
// window of size rc->window, shifted by rc->window / 2. // window of size rc->window, shifted by rc->window / 2.
// Ignore first window segment, due to key frame. // Ignore first window segment, due to key frame.
if (frame_cnt > rc.window_size) { if (frame_cnt > (unsigned int)rc.window_size) {
tl = layer_id.temporal_layer_id; tl = layer_id.temporal_layer_id;
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
@ -823,13 +831,14 @@ int main(int argc, const char **argv) {
} }
// Second shifted window. // Second shifted window.
if (frame_cnt > rc.window_size + rc.window_size / 2) { if (frame_cnt >
(unsigned int)(rc.window_size + rc.window_size / 2)) {
tl = layer_id.temporal_layer_id; tl = layer_id.temporal_layer_id;
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate; sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
} }
if (frame_cnt > 2 * rc.window_size && if (frame_cnt > (unsigned int)(2 * rc.window_size) &&
frame_cnt % rc.window_size == 0) { frame_cnt % rc.window_size == 0) {
rc.window_count += 1; rc.window_count += 1;
rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size; rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
@ -842,10 +851,11 @@ int main(int argc, const char **argv) {
} }
#endif #endif
} }
/*
printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received, printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
!!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY), !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
(int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts); (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
*/
if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1) if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
si->bytes_sum[0] += (int)cx_pkt->data.frame.sz; si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
++frames_received; ++frames_received;

View File

@ -53,6 +53,10 @@ static const int DEFAULT_SCALE_FACTORS_NUM[VPX_SS_MAX_LAYERS] = { 4, 5, 7, 11,
static const int DEFAULT_SCALE_FACTORS_DEN[VPX_SS_MAX_LAYERS] = { 16, 16, 16, static const int DEFAULT_SCALE_FACTORS_DEN[VPX_SS_MAX_LAYERS] = { 16, 16, 16,
16, 16 }; 16, 16 };
static const int DEFAULT_SCALE_FACTORS_NUM_2x[VPX_SS_MAX_LAYERS] = { 1, 2, 4 };
static const int DEFAULT_SCALE_FACTORS_DEN_2x[VPX_SS_MAX_LAYERS] = { 4, 4, 4 };
typedef enum { typedef enum {
QUANTIZER = 0, QUANTIZER = 0,
BITRATE, BITRATE,
@ -156,6 +160,9 @@ static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx,
char *token; char *token;
const char *delim = ","; const char *delim = ",";
char *save_ptr; char *save_ptr;
int num_layers = svc_ctx->spatial_layers;
if (type == BITRATE)
num_layers = svc_ctx->spatial_layers * svc_ctx->temporal_layers;
if (input == NULL || option0 == NULL || if (input == NULL || option0 == NULL ||
(option1 == NULL && type == SCALE_FACTOR)) (option1 == NULL && type == SCALE_FACTOR))
@ -163,7 +170,7 @@ static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx,
input_string = strdup(input); input_string = strdup(input);
token = strtok_r(input_string, delim, &save_ptr); token = strtok_r(input_string, delim, &save_ptr);
for (i = 0; i < svc_ctx->spatial_layers; ++i) { for (i = 0; i < num_layers; ++i) {
if (token != NULL) { if (token != NULL) {
res = extract_option(type, token, option0 + i, option1 + i); res = extract_option(type, token, option0 + i, option1 + i);
if (res != VPX_CODEC_OK) break; if (res != VPX_CODEC_OK) break;
@ -172,7 +179,7 @@ static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx,
break; break;
} }
} }
if (res == VPX_CODEC_OK && i != svc_ctx->spatial_layers) { if (res == VPX_CODEC_OK && i != num_layers) {
svc_log(svc_ctx, SVC_LOG_ERROR, svc_log(svc_ctx, SVC_LOG_ERROR,
"svc: layer params type: %d %d values required, " "svc: layer params type: %d %d values required, "
"but only %d specified\n", "but only %d specified\n",
@ -297,7 +304,9 @@ void assign_layer_bitrates(const SvcContext *svc_ctx,
if (si->bitrates[0] != 0) { if (si->bitrates[0] != 0) {
enc_cfg->rc_target_bitrate = 0; enc_cfg->rc_target_bitrate = 0;
for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
enc_cfg->ss_target_bitrate[sl * svc_ctx->temporal_layers] = 0; enc_cfg->rc_target_bitrate +=
si->bitrates[sl * svc_ctx->temporal_layers +
svc_ctx->temporal_layers - 1];
for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
enc_cfg->ss_target_bitrate[sl * svc_ctx->temporal_layers] += enc_cfg->ss_target_bitrate[sl * svc_ctx->temporal_layers] +=
(unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl]; (unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl];
@ -344,6 +353,7 @@ void assign_layer_bitrates(const SvcContext *svc_ctx,
enc_cfg->rc_target_bitrate = 0; enc_cfg->rc_target_bitrate = 0;
for (i = 0; i < svc_ctx->spatial_layers; ++i) { for (i = 0; i < svc_ctx->spatial_layers; ++i) {
enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i]; enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i];
enc_cfg->layer_target_bitrate[i] = (unsigned int)si->bitrates[i];
enc_cfg->rc_target_bitrate += si->bitrates[i]; enc_cfg->rc_target_bitrate += si->bitrates[i];
} }
} else { } else {
@ -412,12 +422,24 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN[sl]; si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN[sl];
si->svc_params.speed_per_layer[sl] = svc_ctx->speed; si->svc_params.speed_per_layer[sl] = svc_ctx->speed;
} }
if (enc_cfg->rc_end_usage == VPX_CBR && enc_cfg->g_pass == VPX_RC_ONE_PASS &&
svc_ctx->spatial_layers <= 3) {
for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
int sl2 = (svc_ctx->spatial_layers == 2) ? sl + 1 : sl;
si->svc_params.scaling_factor_num[sl] = DEFAULT_SCALE_FACTORS_NUM_2x[sl2];
si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN_2x[sl2];
}
}
for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
i = sl * svc_ctx->temporal_layers + tl; i = sl * svc_ctx->temporal_layers + tl;
si->svc_params.max_quantizers[i] = MAX_QUANTIZER; si->svc_params.max_quantizers[i] = MAX_QUANTIZER;
si->svc_params.min_quantizers[i] = 0; si->svc_params.min_quantizers[i] = 0;
if (enc_cfg->rc_end_usage == VPX_CBR &&
enc_cfg->g_pass == VPX_RC_ONE_PASS) {
si->svc_params.max_quantizers[i] = 56;
si->svc_params.min_quantizers[i] = 2;
}
} }
} }

View File

@ -54,7 +54,7 @@ typedef struct SvcInternal {
// values extracted from option, quantizers // values extracted from option, quantizers
vpx_svc_extra_cfg_t svc_params; vpx_svc_extra_cfg_t svc_params;
int enable_auto_alt_ref[VPX_SS_MAX_LAYERS]; int enable_auto_alt_ref[VPX_SS_MAX_LAYERS];
int bitrates[VPX_SS_MAX_LAYERS]; int bitrates[VPX_MAX_LAYERS];
// accumulated statistics // accumulated statistics
double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V