Refactored the row based multi-threading code
Modified the code to facilitate bit-match tests in first pass Added unit-tests to test the row based multi-threading behavior for bit-exactness Change-Id: Ieaf6a8f935bb1075597e0a3b52d9989c8546d7df
This commit is contained in:
parent
61927ba4ac
commit
97d6a4cbd1
@ -40,6 +40,7 @@ class VPxFirstPassEncoderThreadTest
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
new_mt_mode_ = 1;
|
||||
bit_match_mode_ = 0;
|
||||
first_pass_only_ = true;
|
||||
firstpass_stats_.buf = NULL;
|
||||
firstpass_stats_.sz = 0;
|
||||
@ -85,6 +86,8 @@ class VPxFirstPassEncoderThreadTest
|
||||
if (encoding_mode_ == ::libvpx_test::kTwoPassGood)
|
||||
encoder->Control(VP9E_SET_NEW_MT, new_mt_mode_);
|
||||
|
||||
encoder->Control(VP9E_ENABLE_THREAD_BIT_MATCH, bit_match_mode_);
|
||||
|
||||
encoder_initialized_ = true;
|
||||
}
|
||||
}
|
||||
@ -110,6 +113,7 @@ class VPxFirstPassEncoderThreadTest
|
||||
::libvpx_test::TestMode encoding_mode_;
|
||||
int set_cpu_used_;
|
||||
int new_mt_mode_;
|
||||
int bit_match_mode_;
|
||||
bool first_pass_only_;
|
||||
vpx_fixed_buf_t firstpass_stats_;
|
||||
};
|
||||
@ -144,6 +148,28 @@ static void compare_fp_stats(vpx_fixed_buf_t *fp_stats) {
|
||||
fp_stats->sz = 0;
|
||||
}
|
||||
|
||||
static void compare_fp_stats_md5(vpx_fixed_buf_t *fp_stats) {
|
||||
// fp_stats consists of 2 set of first pass encoding stats. These 2 set of
|
||||
// stats are compared to check if the stats match.
|
||||
uint8_t *stats1 = reinterpret_cast<uint8_t *>(fp_stats->buf);
|
||||
uint8_t *stats2 = stats1 + fp_stats->sz / 2;
|
||||
::libvpx_test::MD5 md5_new_mt_0, md5_new_mt_1;
|
||||
|
||||
md5_new_mt_0.Add(stats1, fp_stats->sz / 2);
|
||||
const char *md5_new_mt_0_str = md5_new_mt_0.Get();
|
||||
|
||||
md5_new_mt_1.Add(stats2, fp_stats->sz / 2);
|
||||
const char *md5_new_mt_1_str = md5_new_mt_1.Get();
|
||||
|
||||
// Check md5 match.
|
||||
ASSERT_STREQ(md5_new_mt_0_str, md5_new_mt_1_str)
|
||||
<< "MD5 checksums don't match";
|
||||
|
||||
// Reset firstpass_stats_ to 0.
|
||||
memset((uint8_t *)fp_stats->buf, 0, fp_stats->sz);
|
||||
fp_stats->sz = 0;
|
||||
}
|
||||
|
||||
TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) {
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
|
||||
|
||||
@ -151,6 +177,7 @@ TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) {
|
||||
cfg_.rc_target_bitrate = 1000;
|
||||
|
||||
// Test new_mt_mode: 0 vs 1 (threads = 1, tiles_ = 0)
|
||||
bit_match_mode_ = 0;
|
||||
tiles_ = 0;
|
||||
cfg_.g_threads = 1;
|
||||
|
||||
@ -177,6 +204,21 @@ TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) {
|
||||
|
||||
// Compare to check if single-thread and multi-thread stats matches.
|
||||
compare_fp_stats(&firstpass_stats_);
|
||||
|
||||
// Test new_mt_mode: 0 vs 1 (threads = 8, tiles_ = 2)
|
||||
bit_match_mode_ = 1;
|
||||
tiles_ = 2;
|
||||
cfg_.g_threads = 8;
|
||||
|
||||
new_mt_mode_ = 0;
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
new_mt_mode_ = 1;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
// Compare to check if stats match with new-mt=0/1.
|
||||
compare_fp_stats_md5(&firstpass_stats_);
|
||||
}
|
||||
|
||||
class VPxEncoderThreadTest
|
||||
@ -191,6 +233,7 @@ class VPxEncoderThreadTest
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
md5_.clear();
|
||||
new_mt_mode_ = 1;
|
||||
bit_match_mode_ = 0;
|
||||
}
|
||||
virtual ~VPxEncoderThreadTest() {}
|
||||
|
||||
@ -229,10 +272,11 @@ class VPxEncoderThreadTest
|
||||
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
|
||||
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
|
||||
|
||||
// While new_mt = 1(namely, using row-based multi-threading), several
|
||||
encoder->Control(VP9E_SET_NEW_MT, new_mt_mode_);
|
||||
// While new_mt = 1/0(with/without row-based multi-threading), several
|
||||
// speed features that would adaptively adjust encoding parameters have
|
||||
// to be disabled to guarantee the bit match of the resulted bitstream.
|
||||
if (new_mt_mode_) encoder->Control(VP9E_ENABLE_THREAD_BIT_MATCH, 1);
|
||||
encoder->Control(VP9E_ENABLE_THREAD_BIT_MATCH, bit_match_mode_);
|
||||
} else {
|
||||
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
|
||||
encoder->Control(VP9E_SET_AQ_MODE, 3);
|
||||
@ -265,15 +309,18 @@ class VPxEncoderThreadTest
|
||||
::libvpx_test::TestMode encoding_mode_;
|
||||
int set_cpu_used_;
|
||||
int new_mt_mode_;
|
||||
int bit_match_mode_;
|
||||
std::vector<std::string> md5_;
|
||||
};
|
||||
|
||||
TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
|
||||
std::vector<std::string> single_thr_md5, multi_thr_md5;
|
||||
std::vector<std::string> single_thr_md5, multi_thr_md5, new_mt_0_md5;
|
||||
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
|
||||
|
||||
cfg_.rc_target_bitrate = 1000;
|
||||
bit_match_mode_ = 1;
|
||||
new_mt_mode_ = 1;
|
||||
|
||||
// Encode using single thread.
|
||||
cfg_.g_threads = 1;
|
||||
@ -290,6 +337,17 @@ TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
|
||||
|
||||
// Compare to check if two vectors are equal.
|
||||
ASSERT_EQ(single_thr_md5, multi_thr_md5);
|
||||
|
||||
// Encode with new-mt 0.
|
||||
new_mt_mode_ = 0;
|
||||
cfg_.g_threads = threads_;
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
new_mt_0_md5 = md5_;
|
||||
md5_.clear();
|
||||
|
||||
// Compare to check if two vectors are equal.
|
||||
ASSERT_EQ(new_mt_0_md5, multi_thr_md5);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
|
@ -1732,12 +1732,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
CHECK_MEM_ERROR(
|
||||
cm, cpi->twopass.fp_mb_float_stats,
|
||||
vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1));
|
||||
#endif
|
||||
|
||||
cpi->refresh_alt_ref_frame = 0;
|
||||
cpi->multi_arf_last_grp_enabled = 0;
|
||||
|
||||
@ -2118,11 +2112,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
vpx_free(cpi->twopass.fp_mb_float_stats);
|
||||
cpi->twopass.fp_mb_float_stats = NULL;
|
||||
#endif
|
||||
|
||||
vp9_remove_common(cm);
|
||||
vp9_free_ref_frame_buffers(cm->buffer_pool);
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
|
@ -287,6 +287,9 @@ void vp9_end_first_pass(VP9_COMP *cpi) {
|
||||
} else {
|
||||
output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
|
||||
}
|
||||
|
||||
vpx_free(cpi->twopass.fp_mb_float_stats);
|
||||
cpi->twopass.fp_mb_float_stats = NULL;
|
||||
}
|
||||
|
||||
static vpx_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
|
||||
@ -647,7 +650,8 @@ static int fp_estimate_block_noise(MACROBLOCK *x, BLOCK_SIZE bsize) {
|
||||
return block_noise << 2; // Scale << 2 to account for sampling.
|
||||
}
|
||||
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
// This function is called to test the functionality of row based
|
||||
// multi-threading in unit tests for bit-exactness
|
||||
static void accumulate_floating_point_stats(VP9_COMP *cpi,
|
||||
TileDataEnc *first_tile_col) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
@ -667,7 +671,6 @@ static void accumulate_floating_point_stats(VP9_COMP *cpi,
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps,
|
||||
FIRSTPASS_DATA *fp_acc_data) {
|
||||
@ -804,6 +807,10 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
|
||||
: NULL;
|
||||
MODE_INFO mi_above, mi_left;
|
||||
|
||||
double mb_intra_factor;
|
||||
double mb_brightness_factor;
|
||||
double mb_neutral_count;
|
||||
|
||||
// First pass code requires valid last and new frame buffers.
|
||||
assert(new_yv12 != NULL);
|
||||
assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL));
|
||||
@ -861,9 +868,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
|
||||
const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
|
||||
double log_intra;
|
||||
int level_sample;
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
const int mb_index = mb_row * cm->mb_cols + mb_col;
|
||||
#endif
|
||||
|
||||
#if CONFIG_FP_MB_STATS
|
||||
const int mb_index = mb_row * cm->mb_cols + mb_col;
|
||||
@ -962,16 +967,15 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
|
||||
vpx_clear_system_state();
|
||||
log_intra = log(this_error + 1.0);
|
||||
if (log_intra < 10.0) {
|
||||
fp_acc_data->intra_factor += 1.0 + ((10.0 - log_intra) * 0.05);
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor =
|
||||
1.0 + ((10.0 - log_intra) * 0.05);
|
||||
#endif
|
||||
mb_intra_factor = 1.0 + ((10.0 - log_intra) * 0.05);
|
||||
fp_acc_data->intra_factor += mb_intra_factor;
|
||||
if (cpi->oxcf.ethread_bit_match)
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor =
|
||||
mb_intra_factor;
|
||||
} else {
|
||||
fp_acc_data->intra_factor += 1.0;
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0;
|
||||
#endif
|
||||
if (cpi->oxcf.ethread_bit_match)
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0;
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
@ -983,17 +987,16 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
|
||||
level_sample = x->plane[0].src.buf[0];
|
||||
#endif
|
||||
if ((level_sample < DARK_THRESH) && (log_intra < 9.0)) {
|
||||
fp_acc_data->brightness_factor +=
|
||||
1.0 + (0.01 * (DARK_THRESH - level_sample));
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor =
|
||||
1.0 + (0.01 * (DARK_THRESH - level_sample));
|
||||
#endif
|
||||
mb_brightness_factor = 1.0 + (0.01 * (DARK_THRESH - level_sample));
|
||||
fp_acc_data->brightness_factor += mb_brightness_factor;
|
||||
if (cpi->oxcf.ethread_bit_match)
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor =
|
||||
mb_brightness_factor;
|
||||
} else {
|
||||
fp_acc_data->brightness_factor += 1.0;
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = 1.0;
|
||||
#endif
|
||||
if (cpi->oxcf.ethread_bit_match)
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor =
|
||||
1.0;
|
||||
}
|
||||
|
||||
// Intrapenalty below deals with situations where the intra and inter
|
||||
@ -1153,19 +1156,19 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
|
||||
if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
|
||||
(this_error < (2 * intrapenalty))) {
|
||||
fp_acc_data->neutral_count += 1.0;
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = 1.0;
|
||||
#endif
|
||||
if (cpi->oxcf.ethread_bit_match)
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count =
|
||||
1.0;
|
||||
// Also track cases where the intra is not much worse than the inter
|
||||
// and use this in limiting the GF/arf group length.
|
||||
} else if ((this_error > NCOUNT_INTRA_THRESH) &&
|
||||
(this_error < (NCOUNT_INTRA_FACTOR * motion_error))) {
|
||||
fp_acc_data->neutral_count +=
|
||||
mb_neutral_count =
|
||||
(double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error);
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count =
|
||||
(double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error);
|
||||
#endif
|
||||
fp_acc_data->neutral_count += mb_neutral_count;
|
||||
if (cpi->oxcf.ethread_bit_match)
|
||||
cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count =
|
||||
mb_neutral_count;
|
||||
}
|
||||
|
||||
mv.row *= 8;
|
||||
@ -1403,6 +1406,11 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
|
||||
|
||||
cm->log2_tile_rows = 0;
|
||||
|
||||
if (cpi->oxcf.ethread_bit_match && cpi->twopass.fp_mb_float_stats == NULL)
|
||||
CHECK_MEM_ERROR(
|
||||
cm, cpi->twopass.fp_mb_float_stats,
|
||||
vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1));
|
||||
|
||||
{
|
||||
FIRSTPASS_STATS fps;
|
||||
TileDataEnc *first_tile_col;
|
||||
@ -1415,15 +1423,14 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
|
||||
} else {
|
||||
cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;
|
||||
cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
cm->log2_tile_cols = 0;
|
||||
vp9_zero_array(cpi->twopass.fp_mb_float_stats, cm->MBs);
|
||||
#endif
|
||||
if (cpi->oxcf.ethread_bit_match) {
|
||||
cm->log2_tile_cols = 0;
|
||||
vp9_zero_array(cpi->twopass.fp_mb_float_stats, cm->MBs);
|
||||
}
|
||||
vp9_encode_fp_row_mt(cpi);
|
||||
first_tile_col = &cpi->tile_data[0];
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
accumulate_floating_point_stats(cpi, first_tile_col);
|
||||
#endif
|
||||
if (cpi->oxcf.ethread_bit_match)
|
||||
accumulate_floating_point_stats(cpi, first_tile_col);
|
||||
first_pass_stat_calc(cpi, &fps, &(first_tile_col->fp_data));
|
||||
}
|
||||
|
||||
|
@ -41,14 +41,11 @@ typedef struct {
|
||||
|
||||
#define INVALID_ROW -1
|
||||
|
||||
#define ENABLE_MT_BIT_MATCH 0
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
typedef struct {
|
||||
double frame_mb_intra_factor;
|
||||
double frame_mb_brightness_factor;
|
||||
double frame_mb_neutral_count;
|
||||
} FP_MB_FLOAT_STATS;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
double intra_factor;
|
||||
@ -149,9 +146,7 @@ typedef struct {
|
||||
FIRSTPASS_MB_STATS firstpass_mb_stats;
|
||||
#endif
|
||||
|
||||
#if ENABLE_MT_BIT_MATCH
|
||||
FP_MB_FLOAT_STATS *fp_mb_float_stats;
|
||||
#endif
|
||||
|
||||
// An indication of the content type of the current frame
|
||||
FRAME_CONTENT_TYPE fr_content_type;
|
||||
|
Loading…
Reference in New Issue
Block a user