[spatial svc]Multiple frame context feature
We can use one frame context for each layer so that we don't have to reset the probs every frame. But we can't use prev_mi since we may drop enhancement layers. So we have to generate a non vp9 compatible bitstream and modify it in the player. 1. We need to code all frames as invisible frame to let prev_mi not to be used. But in the bitstream we need to code the show_frame flag to 1 so that the publisher will know it's supposed to be a visible frame. 2. In the player we need to change the show_frame flag to 0 for all frames. Then add an one byte frame into the super frame to tell the decoder which layer we want to show. Change-Id: I75b7304cf31f0ab952f043e33c034495e88f01f3
This commit is contained in:
parent
e1b5d24837
commit
d4a407c051
182
test/svc_test.cc
182
test/svc_test.cc
@ -112,7 +112,7 @@ class SvcTest : public ::testing::Test {
|
||||
video.Next();
|
||||
}
|
||||
|
||||
// Flush encoder and test EOS packet
|
||||
// Flush encoder and test EOS packet.
|
||||
res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(),
|
||||
video.duration(), VPX_DL_GOOD_QUALITY);
|
||||
stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
|
||||
@ -135,7 +135,7 @@ class SvcTest : public ::testing::Test {
|
||||
EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
|
||||
}
|
||||
|
||||
outputs[*frame_received].buf = malloc(frame_size);
|
||||
outputs[*frame_received].buf = malloc(frame_size + 16);
|
||||
ASSERT_TRUE(outputs[*frame_received].buf != NULL);
|
||||
memcpy(outputs[*frame_received].buf, vpx_svc_get_buffer(&svc_),
|
||||
frame_size);
|
||||
@ -176,13 +176,13 @@ class SvcTest : public ::testing::Test {
|
||||
video.Next();
|
||||
}
|
||||
|
||||
// Flush Encoder
|
||||
// Flush encoder.
|
||||
res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
|
||||
video.duration(), VPX_DL_GOOD_QUALITY);
|
||||
EXPECT_EQ(VPX_CODEC_OK, res);
|
||||
StoreFrames(n, outputs, &frame_received);
|
||||
|
||||
EXPECT_EQ(frame_received, (size_t)n);
|
||||
EXPECT_EQ(frame_received, static_cast<size_t>(n));
|
||||
|
||||
ReleaseEncoder();
|
||||
}
|
||||
@ -204,7 +204,7 @@ class SvcTest : public ::testing::Test {
|
||||
++decoded_frames;
|
||||
|
||||
DxDataIterator dec_iter = decoder_->GetDxData();
|
||||
while (dec_iter.Next()) {
|
||||
while (dec_iter.Next() != NULL) {
|
||||
++received_frames;
|
||||
}
|
||||
}
|
||||
@ -214,7 +214,8 @@ class SvcTest : public ::testing::Test {
|
||||
|
||||
void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,
|
||||
const int num_super_frames,
|
||||
const int remained_layers) {
|
||||
const int remained_layers,
|
||||
const bool is_multiple_frame_context) {
|
||||
ASSERT_TRUE(inputs != NULL);
|
||||
ASSERT_GT(num_super_frames, 0);
|
||||
ASSERT_GT(remained_layers, 0);
|
||||
@ -236,7 +237,7 @@ class SvcTest : public ::testing::Test {
|
||||
uint8_t *frame_data = static_cast<uint8_t *>(inputs[i].buf);
|
||||
uint8_t *frame_start = frame_data;
|
||||
for (frame = 0; frame < frame_count; ++frame) {
|
||||
// Looking for a visible frame
|
||||
// Looking for a visible frame.
|
||||
if (frame_data[0] & 0x02) {
|
||||
++frames_found;
|
||||
if (frames_found == remained_layers)
|
||||
@ -244,11 +245,17 @@ class SvcTest : public ::testing::Test {
|
||||
}
|
||||
frame_data += frame_sizes[frame];
|
||||
}
|
||||
ASSERT_LT(frame, frame_count);
|
||||
if (frame == frame_count - 1)
|
||||
ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "
|
||||
<< "remaining_layers: " << remained_layers
|
||||
<< " super_frame: " << i
|
||||
<< " is_multiple_frame_context: " << is_multiple_frame_context;
|
||||
if (frame == frame_count - 1 && !is_multiple_frame_context)
|
||||
continue;
|
||||
|
||||
frame_data += frame_sizes[frame];
|
||||
// We need to add one more frame for multiple frame context.
|
||||
if (is_multiple_frame_context)
|
||||
++frame;
|
||||
uint8_t marker =
|
||||
static_cast<const uint8_t *>(inputs[i].buf)[inputs[i].sz - 1];
|
||||
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
|
||||
@ -256,11 +263,37 @@ class SvcTest : public ::testing::Test {
|
||||
const size_t new_index_sz = 2 + mag * (frame + 1);
|
||||
marker &= 0x0f8;
|
||||
marker |= frame;
|
||||
|
||||
// Copy existing frame sizes.
|
||||
memmove(frame_data + (is_multiple_frame_context ? 2 : 1),
|
||||
frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);
|
||||
if (is_multiple_frame_context) {
|
||||
// Add a one byte frame with flag show_existing frame.
|
||||
*frame_data++ = 0x88 | (remained_layers - 1);
|
||||
}
|
||||
// New marker.
|
||||
frame_data[0] = marker;
|
||||
memcpy(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1,
|
||||
new_index_sz - 2);
|
||||
frame_data[new_index_sz - 1] = marker;
|
||||
inputs[i].sz = frame_data - frame_start + new_index_sz;
|
||||
frame_data += (mag * (frame + 1) + 1);
|
||||
|
||||
if (is_multiple_frame_context) {
|
||||
// Write the frame size for the one byte frame.
|
||||
frame_data -= mag;
|
||||
*frame_data++ = 1;
|
||||
for (uint32_t j = 1; j < mag; ++j) {
|
||||
*frame_data++ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
*frame_data++ = marker;
|
||||
inputs[i].sz = frame_data - frame_start;
|
||||
|
||||
if (is_multiple_frame_context) {
|
||||
// Change the show frame flag to 0 for all frames.
|
||||
for (int j = 0; j < frame; ++j) {
|
||||
frame_start[0] &= ~2;
|
||||
frame_start += frame_sizes[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -507,7 +540,7 @@ TEST_F(SvcTest, TwoPassEncode2LayersDecodeBaseLayerOnly) {
|
||||
vpx_fixed_buf outputs[10];
|
||||
memset(&outputs[0], 0, sizeof(outputs));
|
||||
Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
|
||||
DropEnhancementLayers(&outputs[0], 10, 1);
|
||||
DropEnhancementLayers(&outputs[0], 10, 1, false);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
FreeBitstreamBuffers(&outputs[0], 10);
|
||||
}
|
||||
@ -525,13 +558,13 @@ TEST_F(SvcTest, TwoPassEncode5LayersDecode54321Layers) {
|
||||
Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);
|
||||
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
DropEnhancementLayers(&outputs[0], 10, 4);
|
||||
DropEnhancementLayers(&outputs[0], 10, 4, false);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
DropEnhancementLayers(&outputs[0], 10, 3);
|
||||
DropEnhancementLayers(&outputs[0], 10, 3, false);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
DropEnhancementLayers(&outputs[0], 10, 2);
|
||||
DropEnhancementLayers(&outputs[0], 10, 2, false);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
DropEnhancementLayers(&outputs[0], 10, 1);
|
||||
DropEnhancementLayers(&outputs[0], 10, 1, false);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
|
||||
FreeBitstreamBuffers(&outputs[0], 10);
|
||||
@ -568,12 +601,121 @@ TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) {
|
||||
memset(&outputs[0], 0, sizeof(outputs));
|
||||
Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);
|
||||
DecodeNFrames(&outputs[0], 20);
|
||||
DropEnhancementLayers(&outputs[0], 20, 2);
|
||||
DropEnhancementLayers(&outputs[0], 20, 2, false);
|
||||
DecodeNFrames(&outputs[0], 20);
|
||||
DropEnhancementLayers(&outputs[0], 20, 1);
|
||||
DropEnhancementLayers(&outputs[0], 20, 1, false);
|
||||
DecodeNFrames(&outputs[0], 20);
|
||||
|
||||
FreeBitstreamBuffers(&outputs[0], 20);
|
||||
}
|
||||
|
||||
TEST_F(SvcTest, SetMultipleFrameContextOption) {
|
||||
svc_.spatial_layers = 5;
|
||||
vpx_codec_err_t res =
|
||||
vpx_svc_set_options(&svc_, "multi-frame-contexts=1");
|
||||
EXPECT_EQ(VPX_CODEC_OK, res);
|
||||
res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
|
||||
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
|
||||
|
||||
svc_.spatial_layers = 2;
|
||||
res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1");
|
||||
InitializeEncoder();
|
||||
}
|
||||
|
||||
TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContext) {
|
||||
// First pass encode
|
||||
std::string stats_buf;
|
||||
Pass1EncodeNFrames(10, 2, &stats_buf);
|
||||
|
||||
// Second pass encode
|
||||
codec_enc_.g_pass = VPX_RC_LAST_PASS;
|
||||
codec_enc_.g_error_resilient = 0;
|
||||
vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");
|
||||
vpx_fixed_buf outputs[10];
|
||||
memset(&outputs[0], 0, sizeof(outputs));
|
||||
Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
|
||||
DropEnhancementLayers(&outputs[0], 10, 2, true);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
FreeBitstreamBuffers(&outputs[0], 10);
|
||||
}
|
||||
|
||||
TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContextDecodeBaselayer) {
|
||||
// First pass encode
|
||||
std::string stats_buf;
|
||||
Pass1EncodeNFrames(10, 2, &stats_buf);
|
||||
|
||||
// Second pass encode
|
||||
codec_enc_.g_pass = VPX_RC_LAST_PASS;
|
||||
codec_enc_.g_error_resilient = 0;
|
||||
vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");
|
||||
vpx_fixed_buf outputs[10];
|
||||
memset(&outputs[0], 0, sizeof(outputs));
|
||||
Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
|
||||
DropEnhancementLayers(&outputs[0], 10, 1, true);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
FreeBitstreamBuffers(&outputs[0], 10);
|
||||
}
|
||||
|
||||
TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContext) {
|
||||
// First pass encode
|
||||
std::string stats_buf;
|
||||
vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1");
|
||||
Pass1EncodeNFrames(10, 2, &stats_buf);
|
||||
|
||||
// Second pass encode
|
||||
codec_enc_.g_pass = VPX_RC_LAST_PASS;
|
||||
codec_enc_.g_error_resilient = 0;
|
||||
vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 scale-factors=1/1,1/1 "
|
||||
"multi-frame-contexts=1");
|
||||
vpx_fixed_buf outputs[10];
|
||||
memset(&outputs[0], 0, sizeof(outputs));
|
||||
Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
|
||||
DropEnhancementLayers(&outputs[0], 10, 2, true);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
FreeBitstreamBuffers(&outputs[0], 10);
|
||||
}
|
||||
|
||||
TEST_F(SvcTest, TwoPassEncode3SNRLayersWithMultipleFrameContextDecode321Layer) {
|
||||
// First pass encode
|
||||
std::string stats_buf;
|
||||
vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1");
|
||||
Pass1EncodeNFrames(10, 3, &stats_buf);
|
||||
|
||||
// Second pass encode
|
||||
codec_enc_.g_pass = VPX_RC_LAST_PASS;
|
||||
codec_enc_.g_error_resilient = 0;
|
||||
vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1 "
|
||||
"multi-frame-contexts=1");
|
||||
vpx_fixed_buf outputs[10];
|
||||
memset(&outputs[0], 0, sizeof(outputs));
|
||||
Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]);
|
||||
|
||||
vpx_fixed_buf outputs_new[10];
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
outputs_new[i].buf = malloc(outputs[i].sz + 16);
|
||||
ASSERT_TRUE(outputs_new[i].buf != NULL);
|
||||
memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
|
||||
outputs_new[i].sz = outputs[i].sz;
|
||||
}
|
||||
DropEnhancementLayers(&outputs_new[0], 10, 3, true);
|
||||
DecodeNFrames(&outputs_new[0], 10);
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
|
||||
outputs_new[i].sz = outputs[i].sz;
|
||||
}
|
||||
DropEnhancementLayers(&outputs_new[0], 10, 2, true);
|
||||
DecodeNFrames(&outputs_new[0], 10);
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
|
||||
outputs_new[i].sz = outputs[i].sz;
|
||||
}
|
||||
DropEnhancementLayers(&outputs_new[0], 10, 1, true);
|
||||
DecodeNFrames(&outputs_new[0], 10);
|
||||
|
||||
FreeBitstreamBuffers(&outputs[0], 10);
|
||||
FreeBitstreamBuffers(&outputs_new[0], 10);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -1081,7 +1081,16 @@ static void write_uncompressed_header(VP9_COMP *cpi,
|
||||
write_bitdepth_colorspace_sampling(cm, wb);
|
||||
write_frame_size(cm, wb);
|
||||
} else {
|
||||
if (!cm->show_frame)
|
||||
// In spatial svc if it's not error_resilient_mode then we need to code all
|
||||
// visible frames as invisible. But we need to keep the show_frame flag so
|
||||
// that the publisher could know whether it is supposed to be visible.
|
||||
// So we will code the show_frame flag as it is. Then code the intra_only
|
||||
// bit here. This will make the bitstream incompatible. In the player we
|
||||
// will change to show_frame flag to 0, then add an one byte frame with
|
||||
// show_existing_frame flag which tells the decoder which frame we want to
|
||||
// show.
|
||||
if (!cm->show_frame ||
|
||||
(is_spatial_svc(cpi) && cm->error_resilient_mode == 0))
|
||||
vp9_wb_write_bit(wb, cm->intra_only);
|
||||
|
||||
if (!cm->error_resilient_mode)
|
||||
|
@ -2102,6 +2102,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
|
||||
cm->reset_frame_context = 2;
|
||||
}
|
||||
}
|
||||
if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) {
|
||||
cm->frame_context_idx = cpi->svc.spatial_layer_id;
|
||||
|
||||
// The probs will be updated based on the frame type of its previous
|
||||
// frame if frame_parallel_decoding_mode is 0. The type may vary for
|
||||
// the frame after a key frame in base layer since we may drop enhancement
|
||||
// layers. So set frame_parallel_decoding_mode to 1 in this case.
|
||||
if (cpi->svc.spatial_layer_id == 0 &&
|
||||
cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
|
||||
cm->frame_parallel_decoding_mode = 1;
|
||||
else
|
||||
cm->frame_parallel_decoding_mode = 0;
|
||||
}
|
||||
|
||||
// Configure experimental use of segmentation for enhanced coding of
|
||||
// static regions if indicated.
|
||||
@ -2277,8 +2290,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
|
||||
cm->last_height = cm->height;
|
||||
|
||||
// reset to normal state now that we are done.
|
||||
if (!cm->show_existing_frame)
|
||||
if (!cm->show_existing_frame) {
|
||||
if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0)
|
||||
cm->last_show_frame = 0;
|
||||
else
|
||||
cm->last_show_frame = cm->show_frame;
|
||||
}
|
||||
|
||||
if (cm->show_frame) {
|
||||
vp9_swap_mi_and_prev_mi(cm);
|
||||
@ -2289,6 +2306,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
|
||||
if (cpi->use_svc)
|
||||
vp9_inc_frame_in_layer(&cpi->svc);
|
||||
}
|
||||
|
||||
if (is_spatial_svc(cpi))
|
||||
cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =
|
||||
cm->frame_type;
|
||||
}
|
||||
|
||||
static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
|
||||
|
@ -36,6 +36,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
|
||||
int i;
|
||||
lc->current_video_frame_in_layer = 0;
|
||||
lc->layer_size = 0;
|
||||
lc->last_frame_type = FRAME_TYPES;
|
||||
lrc->ni_av_qi = oxcf->worst_allowed_q;
|
||||
lrc->total_actual_bits = 0;
|
||||
lrc->total_target_vs_actual = 0;
|
||||
|
@ -28,6 +28,7 @@ typedef struct {
|
||||
struct vpx_fixed_buf rc_twopass_stats_in;
|
||||
unsigned int current_video_frame_in_layer;
|
||||
int is_key_frame;
|
||||
FRAME_TYPE last_frame_type;
|
||||
vpx_svc_parameters_t svc_params_received;
|
||||
struct lookahead_entry *alt_ref_source;
|
||||
int alt_ref_idx;
|
||||
|
@ -188,6 +188,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)
|
||||
ERROR("Not enough ref buffers for svc alt ref frames");
|
||||
}
|
||||
if (cfg->ss_number_layers > 3 && cfg->g_error_resilient == 0)
|
||||
ERROR("Multiple frame contexts are not supported for more than 3 layers");
|
||||
#endif
|
||||
|
||||
RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
|
||||
|
@ -86,6 +86,7 @@ typedef struct SvcInternal {
|
||||
int layers;
|
||||
int layer;
|
||||
int is_keyframe;
|
||||
int use_multiple_frame_contexts;
|
||||
|
||||
FrameData *frame_list;
|
||||
FrameData *frame_temp;
|
||||
@ -366,6 +367,7 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
|
||||
char *option_name;
|
||||
char *option_value;
|
||||
char *input_ptr;
|
||||
SvcInternal *const si = get_svc_internal(svc_ctx);
|
||||
vpx_codec_err_t res = VPX_CODEC_OK;
|
||||
|
||||
if (options == NULL) return VPX_CODEC_OK;
|
||||
@ -393,6 +395,8 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
|
||||
} else if (strcmp("auto-alt-refs", option_name) == 0) {
|
||||
res = parse_auto_alt_ref(svc_ctx, option_value);
|
||||
if (res != VPX_CODEC_OK) break;
|
||||
} else if (strcmp("multi-frame-contexts", option_name) == 0) {
|
||||
si->use_multiple_frame_contexts = atoi(option_value);
|
||||
} else {
|
||||
svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name);
|
||||
res = VPX_CODEC_INVALID_PARAM;
|
||||
@ -401,6 +405,10 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
|
||||
option_name = strtok_r(NULL, "=", &input_ptr);
|
||||
}
|
||||
free(input_string);
|
||||
|
||||
if (si->use_multiple_frame_contexts && svc_ctx->spatial_layers > 3)
|
||||
res = VPX_CODEC_INVALID_PARAM;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -534,6 +542,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
|
||||
enc_cfg->rc_buf_initial_sz = 500;
|
||||
enc_cfg->rc_buf_optimal_sz = 600;
|
||||
enc_cfg->rc_buf_sz = 1000;
|
||||
if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0)
|
||||
enc_cfg->g_error_resilient = 1;
|
||||
|
||||
// Initialize codec
|
||||
|
Loading…
x
Reference in New Issue
Block a user