vp9: 1 pass: Move source sad computation into encodeframe loop.
Refactor to split the 1 passs source sad computation into scene detection (currently used for VBR and screen-content mode), and superblock based source sad computation (used in non-rd CBR mode). This allows the source sad computation for CBR mode to be multi-threaded. No change in compression. Change-Id: I112f2918613ccbd37c1771d852606d3af18c1388
This commit is contained in:
parent
07ad5a15c2
commit
66c6b4d6fc
@ -175,6 +175,10 @@ struct macroblock {
|
||||
|
||||
uint8_t last_sb_high_content;
|
||||
|
||||
// For each superblock: saves the content value (e.g., low/high sad/sumdiff)
|
||||
// based on source sad, prior to encoding the frame.
|
||||
uint8_t content_state_sb;
|
||||
|
||||
// Used to save the status of whether a block has a low variance in
|
||||
// choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for
|
||||
// 32x32, 9~24 for 16x16.
|
||||
|
@ -963,6 +963,46 @@ static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize,
|
||||
}
|
||||
}
|
||||
|
||||
static void avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift,
|
||||
int sb_offset) {
|
||||
unsigned int tmp_sse;
|
||||
uint64_t tmp_sad;
|
||||
unsigned int tmp_variance;
|
||||
const BLOCK_SIZE bsize = BLOCK_64X64;
|
||||
uint8_t *src_y = cpi->Source->y_buffer;
|
||||
int src_ystride = cpi->Source->y_stride;
|
||||
uint8_t *last_src_y = cpi->Last_Source->y_buffer;
|
||||
int last_src_ystride = cpi->Last_Source->y_stride;
|
||||
uint64_t avg_source_sad_threshold = 10000;
|
||||
uint64_t avg_source_sad_threshold2 = 12000;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cpi->common.use_highbitdepth) return;
|
||||
#endif
|
||||
src_y += shift;
|
||||
last_src_y += shift;
|
||||
tmp_sad =
|
||||
cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride);
|
||||
tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y,
|
||||
last_src_ystride, &tmp_sse);
|
||||
// Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
|
||||
if (tmp_sad < avg_source_sad_threshold)
|
||||
x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff
|
||||
: kLowSadHighSumdiff;
|
||||
else
|
||||
x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff
|
||||
: kHighSadHighSumdiff;
|
||||
if (cpi->content_state_sb_fd != NULL) {
|
||||
if (tmp_sad < avg_source_sad_threshold2) {
|
||||
// Cap the increment to 255.
|
||||
if (cpi->content_state_sb_fd[sb_offset] < 255)
|
||||
cpi->content_state_sb_fd[sb_offset]++;
|
||||
} else {
|
||||
cpi->content_state_sb_fd[sb_offset] = 0;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// This function chooses partitioning based on the variance between source and
|
||||
// reconstructed last, where variance is computed for down-sampled inputs.
|
||||
static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
|
||||
@ -1011,17 +1051,15 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
|
||||
set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
|
||||
segment_id = xd->mi[0]->segment_id;
|
||||
|
||||
if (cpi->sf.use_source_sad && cpi->content_state_sb != NULL &&
|
||||
!is_key_frame) {
|
||||
// The sb_offset2 is to make it consistent with the index in the function
|
||||
// vp9_avg_source_sad() in vp9_ratectrl.c.
|
||||
if (cpi->sf.use_source_sad && !is_key_frame) {
|
||||
int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
|
||||
content_state = cpi->content_state_sb[sb_offset2];
|
||||
content_state = x->content_state_sb;
|
||||
x->skip_low_source_sad = (content_state == kLowSadLowSumdiff ||
|
||||
content_state == kLowSadHighSumdiff)
|
||||
? 1
|
||||
: 0;
|
||||
x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2];
|
||||
if (cpi->content_state_sb_fd != NULL)
|
||||
x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2];
|
||||
// If source_sad is low copy the partition without computing the y_sad.
|
||||
if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&
|
||||
copy_partitioning(cpi, x, mi_row, mi_col, segment_id, sb_offset)) {
|
||||
@ -4063,6 +4101,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
|
||||
x->color_sensitivity[1] = 0;
|
||||
x->sb_is_skin = 0;
|
||||
x->skip_low_source_sad = 0;
|
||||
x->content_state_sb = 0;
|
||||
|
||||
if (seg->enabled) {
|
||||
const uint8_t *const map =
|
||||
@ -4074,6 +4113,12 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
|
||||
}
|
||||
}
|
||||
|
||||
if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) {
|
||||
int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3);
|
||||
int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
|
||||
avg_source_sad(cpi, x, shift, sb_offset2);
|
||||
}
|
||||
|
||||
// Set the partition type of the 64X64 block
|
||||
switch (partition_search_type) {
|
||||
case VAR_BASED_PARTITION:
|
||||
|
@ -463,9 +463,6 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
|
||||
vpx_free(cpi->copied_frame_cnt);
|
||||
cpi->copied_frame_cnt = NULL;
|
||||
|
||||
vpx_free(cpi->content_state_sb);
|
||||
cpi->content_state_sb = NULL;
|
||||
|
||||
vpx_free(cpi->content_state_sb_fd);
|
||||
cpi->content_state_sb_fd = NULL;
|
||||
|
||||
@ -3094,9 +3091,11 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
|
||||
uint8_t *dest) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
int q = 0, bottom_index = 0, top_index = 0; // Dummy variables.
|
||||
int compute_source_sad = cpi->sf.use_source_sad ||
|
||||
cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
|
||||
cpi->oxcf.rc_mode == VPX_VBR;
|
||||
// Flag to check if its valid to compute the source sad (used for
|
||||
// scene detection and for superblock content state in CBR mode).
|
||||
// The flag may get reset below based on SVC or resizing state.
|
||||
cpi->compute_source_sad_onepass =
|
||||
cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cm->show_frame;
|
||||
|
||||
vpx_clear_system_state();
|
||||
|
||||
@ -3144,16 +3143,13 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
|
||||
if ((cpi->use_svc &&
|
||||
(cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1 ||
|
||||
cpi->svc.current_superframe < 1)) ||
|
||||
cpi->resize_pending || cpi->resize_state || cpi->external_resize) {
|
||||
compute_source_sad = 0;
|
||||
if (cpi->content_state_sb != NULL) {
|
||||
memset(cpi->content_state_sb, 0, (cm->mi_stride >> 3) *
|
||||
((cm->mi_rows >> 3) + 1) *
|
||||
sizeof(*cpi->content_state_sb));
|
||||
cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
|
||||
cpi->resize_state != ORIG) {
|
||||
cpi->compute_source_sad_onepass = 0;
|
||||
if (cpi->content_state_sb_fd != NULL)
|
||||
memset(cpi->content_state_sb_fd, 0,
|
||||
(cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) *
|
||||
sizeof(*cpi->content_state_sb_fd));
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid scaling last_source unless its needed.
|
||||
@ -3166,11 +3162,16 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
|
||||
cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||
|
||||
cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
|
||||
(cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||
|
||||
compute_source_sad))
|
||||
cpi->compute_source_sad_onepass))
|
||||
cpi->Last_Source =
|
||||
vp9_scale_if_required(cm, cpi->unscaled_last_source,
|
||||
&cpi->scaled_last_source, (cpi->oxcf.pass == 0));
|
||||
|
||||
if (cpi->Last_Source == NULL ||
|
||||
cpi->Last_Source->y_width != cpi->Source->y_width ||
|
||||
cpi->Last_Source->y_height != cpi->Source->y_height)
|
||||
cpi->compute_source_sad_onepass = 0;
|
||||
|
||||
if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) {
|
||||
memset(cpi->consec_zero_mv, 0,
|
||||
cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
|
||||
@ -3178,15 +3179,13 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
|
||||
|
||||
vp9_update_noise_estimate(cpi);
|
||||
|
||||
// Compute source_sad if the flag compute_source_sad is set, and
|
||||
// only for 1 pass realtime speed >= 5 with show_frame = 1.
|
||||
// TODO(jianj): Look into removing the condition on resize_state,
|
||||
// and improving these conditions (i.e., better handle SVC case and combine
|
||||
// them with condition above in compute_source_sad).
|
||||
if (cpi->oxcf.pass == 0 && cpi->oxcf.mode == REALTIME &&
|
||||
cpi->oxcf.speed >= 5 && cpi->resize_state == ORIG && compute_source_sad &&
|
||||
cm->show_frame)
|
||||
vp9_avg_source_sad(cpi);
|
||||
// Scene detection is used for VBR mode or screen-content case.
|
||||
// Make sure compute_source_sad_onepass is set (which handles SVC case
|
||||
// and dynamic resize).
|
||||
if (cpi->compute_source_sad_onepass &&
|
||||
(cpi->oxcf.rc_mode == VPX_VBR ||
|
||||
cpi->oxcf.content == VP9E_CONTENT_SCREEN))
|
||||
vp9_scene_detection_onepass(cpi);
|
||||
|
||||
// For 1 pass SVC, since only ZEROMV is allowed for upsampled reference
|
||||
// frame (i.e, svc->force_zero_mode_spatial_ref = 0), we can avoid this
|
||||
|
@ -708,13 +708,12 @@ typedef struct VP9_COMP {
|
||||
uint8_t *copied_frame_cnt;
|
||||
uint8_t max_copied_frame;
|
||||
|
||||
// For each superblock: saves the content value (e.g., low/high sad/sumdiff)
|
||||
// based on source sad, prior to encoding the frame.
|
||||
uint8_t *content_state_sb;
|
||||
// For each superblock: keeps track of the last time (in frame distance) the
|
||||
// the superblock did not have low source sad.
|
||||
uint8_t *content_state_sb_fd;
|
||||
|
||||
int compute_source_sad_onepass;
|
||||
|
||||
LevelConstraint level_constraint;
|
||||
} VP9_COMP;
|
||||
|
||||
|
@ -2213,7 +2213,7 @@ void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi, uint64_t avg_sad_current) {
|
||||
// in content and allow rate control to react.
|
||||
// This function also handles special case of lag_in_frames, to measure content
|
||||
// level in #future frames set by the lag_in_frames.
|
||||
void vp9_avg_source_sad(VP9_COMP *cpi) {
|
||||
void vp9_scene_detection_onepass(VP9_COMP *cpi) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
@ -2284,8 +2284,6 @@ void vp9_avg_source_sad(VP9_COMP *cpi) {
|
||||
int num_samples = 0;
|
||||
int sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
|
||||
int sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
|
||||
uint64_t avg_source_sad_threshold = 10000;
|
||||
uint64_t avg_source_sad_threshold2 = 12000;
|
||||
if (cpi->oxcf.lag_in_frames > 0) {
|
||||
src_y = frames[frame]->y_buffer;
|
||||
src_ystride = frames[frame]->y_stride;
|
||||
@ -2296,34 +2294,12 @@ void vp9_avg_source_sad(VP9_COMP *cpi) {
|
||||
for (sbi_col = 0; sbi_col < sb_cols; ++sbi_col) {
|
||||
// Checker-board pattern, ignore boundary.
|
||||
// If the use_source_sad is on, compute for every superblock.
|
||||
if (cpi->sf.use_source_sad ||
|
||||
((sbi_row > 0 && sbi_col > 0) &&
|
||||
if (((sbi_row > 0 && sbi_col > 0) &&
|
||||
(sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) &&
|
||||
((sbi_row % 2 == 0 && sbi_col % 2 == 0) ||
|
||||
(sbi_row % 2 != 0 && sbi_col % 2 != 0)))) {
|
||||
tmp_sad = cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,
|
||||
last_src_ystride);
|
||||
if (cpi->sf.use_source_sad && cpi->content_state_sb != NULL) {
|
||||
unsigned int tmp_sse;
|
||||
unsigned int tmp_variance = vpx_variance64x64(
|
||||
src_y, src_ystride, last_src_y, last_src_ystride, &tmp_sse);
|
||||
// Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
|
||||
if (tmp_sad < avg_source_sad_threshold)
|
||||
cpi->content_state_sb[num_samples] =
|
||||
((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff
|
||||
: kLowSadHighSumdiff;
|
||||
else
|
||||
cpi->content_state_sb[num_samples] =
|
||||
((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff
|
||||
: kHighSadHighSumdiff;
|
||||
if (tmp_sad < avg_source_sad_threshold2) {
|
||||
// Cap the increment to 255.
|
||||
if (cpi->content_state_sb_fd[num_samples] < 255)
|
||||
cpi->content_state_sb_fd[num_samples]++;
|
||||
} else {
|
||||
cpi->content_state_sb_fd[num_samples] = 0;
|
||||
}
|
||||
}
|
||||
avg_sad += tmp_sad;
|
||||
num_samples++;
|
||||
}
|
||||
|
@ -283,7 +283,7 @@ void vp9_set_target_rate(struct VP9_COMP *cpi);
|
||||
|
||||
int vp9_resize_one_pass_cbr(struct VP9_COMP *cpi);
|
||||
|
||||
void vp9_avg_source_sad(struct VP9_COMP *cpi);
|
||||
void vp9_scene_detection_onepass(struct VP9_COMP *cpi);
|
||||
|
||||
int vp9_encodedframe_overshoot(struct VP9_COMP *cpi, int frame_size, int *q);
|
||||
|
||||
|
@ -512,12 +512,9 @@ static void set_rt_speed_feature_framesize_independent(
|
||||
}
|
||||
if (!cpi->external_resize) sf->use_source_sad = 1;
|
||||
if (sf->use_source_sad) {
|
||||
// For SVC allocate for top layer.
|
||||
if (cpi->content_state_sb == NULL &&
|
||||
if (cpi->content_state_sb_fd == NULL &&
|
||||
(!cpi->use_svc ||
|
||||
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
|
||||
cpi->content_state_sb = (uint8_t *)vpx_calloc(
|
||||
(cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));
|
||||
cpi->content_state_sb_fd = (uint8_t *)vpx_calloc(
|
||||
(cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user