diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc index 729f6b423..28a45743c 100644 --- a/test/vp9_ethread_test.cc +++ b/test/vp9_ethread_test.cc @@ -40,6 +40,7 @@ class VPxFirstPassEncoderThreadTest init_flags_ = VPX_CODEC_USE_PSNR; new_mt_mode_ = 1; + bit_match_mode_ = 0; first_pass_only_ = true; firstpass_stats_.buf = NULL; firstpass_stats_.sz = 0; @@ -85,6 +86,8 @@ class VPxFirstPassEncoderThreadTest if (encoding_mode_ == ::libvpx_test::kTwoPassGood) encoder->Control(VP9E_SET_NEW_MT, new_mt_mode_); + encoder->Control(VP9E_ENABLE_THREAD_BIT_MATCH, bit_match_mode_); + encoder_initialized_ = true; } } @@ -110,6 +113,7 @@ class VPxFirstPassEncoderThreadTest ::libvpx_test::TestMode encoding_mode_; int set_cpu_used_; int new_mt_mode_; + int bit_match_mode_; bool first_pass_only_; vpx_fixed_buf_t firstpass_stats_; }; @@ -144,6 +148,28 @@ static void compare_fp_stats(vpx_fixed_buf_t *fp_stats) { fp_stats->sz = 0; } +static void compare_fp_stats_md5(vpx_fixed_buf_t *fp_stats) { + // fp_stats consists of 2 set of first pass encoding stats. These 2 set of + // stats are compared to check if the stats match. + uint8_t *stats1 = reinterpret_cast(fp_stats->buf); + uint8_t *stats2 = stats1 + fp_stats->sz / 2; + ::libvpx_test::MD5 md5_new_mt_0, md5_new_mt_1; + + md5_new_mt_0.Add(stats1, fp_stats->sz / 2); + const char *md5_new_mt_0_str = md5_new_mt_0.Get(); + + md5_new_mt_1.Add(stats2, fp_stats->sz / 2); + const char *md5_new_mt_1_str = md5_new_mt_1.Get(); + + // Check md5 match. + ASSERT_STREQ(md5_new_mt_0_str, md5_new_mt_1_str) + << "MD5 checksums don't match"; + + // Reset firstpass_stats_ to 0. + memset((uint8_t *)fp_stats->buf, 0, fp_stats->sz); + fp_stats->sz = 0; +} + TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) { ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); @@ -151,6 +177,7 @@ TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) { cfg_.rc_target_bitrate = 1000; // Test new_mt_mode: 0 vs 1 (threads = 1, tiles_ = 0) + bit_match_mode_ = 0; tiles_ = 0; cfg_.g_threads = 1; @@ -177,6 +204,21 @@ TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) { // Compare to check if single-thread and multi-thread stats matches. compare_fp_stats(&firstpass_stats_); + + // Test new_mt_mode: 0 vs 1 (threads = 8, tiles_ = 2) + bit_match_mode_ = 1; + tiles_ = 2; + cfg_.g_threads = 8; + + new_mt_mode_ = 0; + init_flags_ = VPX_CODEC_USE_PSNR; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + new_mt_mode_ = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + // Compare to check if stats match with new-mt=0/1. + compare_fp_stats_md5(&firstpass_stats_); } class VPxEncoderThreadTest @@ -191,6 +233,7 @@ class VPxEncoderThreadTest init_flags_ = VPX_CODEC_USE_PSNR; md5_.clear(); new_mt_mode_ = 1; + bit_match_mode_ = 0; } virtual ~VPxEncoderThreadTest() {} @@ -229,10 +272,11 @@ class VPxEncoderThreadTest encoder->Control(VP8E_SET_ARNR_TYPE, 3); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0); - // While new_mt = 1(namely, using row-based multi-threading), several + encoder->Control(VP9E_SET_NEW_MT, new_mt_mode_); + // While new_mt = 1/0(with/without row-based multi-threading), several // speed features that would adaptively adjust encoding parameters have // to be disabled to guarantee the bit match of the resulted bitstream. - if (new_mt_mode_) encoder->Control(VP9E_ENABLE_THREAD_BIT_MATCH, 1); + encoder->Control(VP9E_ENABLE_THREAD_BIT_MATCH, bit_match_mode_); } else { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0); encoder->Control(VP9E_SET_AQ_MODE, 3); @@ -265,15 +309,18 @@ class VPxEncoderThreadTest ::libvpx_test::TestMode encoding_mode_; int set_cpu_used_; int new_mt_mode_; + int bit_match_mode_; std::vector md5_; }; TEST_P(VPxEncoderThreadTest, EncoderResultTest) { - std::vector single_thr_md5, multi_thr_md5; + std::vector single_thr_md5, multi_thr_md5, new_mt_0_md5; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20); cfg_.rc_target_bitrate = 1000; + bit_match_mode_ = 1; + new_mt_mode_ = 1; // Encode using single thread. cfg_.g_threads = 1; @@ -290,6 +337,17 @@ TEST_P(VPxEncoderThreadTest, EncoderResultTest) { // Compare to check if two vectors are equal. ASSERT_EQ(single_thr_md5, multi_thr_md5); + + // Encode with new-mt 0. + new_mt_mode_ = 0; + cfg_.g_threads = threads_; + init_flags_ = VPX_CODEC_USE_PSNR; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + new_mt_0_md5 = md5_; + md5_.clear(); + + // Compare to check if two vectors are equal. + ASSERT_EQ(new_mt_0_md5, multi_thr_md5); } INSTANTIATE_TEST_CASE_P( diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index d624d04fd..aa8a27c6d 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1732,12 +1732,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, } #endif -#if ENABLE_MT_BIT_MATCH - CHECK_MEM_ERROR( - cm, cpi->twopass.fp_mb_float_stats, - vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1)); -#endif - cpi->refresh_alt_ref_frame = 0; cpi->multi_arf_last_grp_enabled = 0; @@ -2118,11 +2112,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) { } #endif -#if ENABLE_MT_BIT_MATCH - vpx_free(cpi->twopass.fp_mb_float_stats); - cpi->twopass.fp_mb_float_stats = NULL; -#endif - vp9_remove_common(cm); vp9_free_ref_frame_buffers(cm->buffer_pool); #if CONFIG_VP9_POSTPROC diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 3f722090f..280e3d6a6 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -293,6 +293,9 @@ void vp9_end_first_pass(VP9_COMP *cpi) { } else { output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); } + + vpx_free(cpi->twopass.fp_mb_float_stats); + cpi->twopass.fp_mb_float_stats = NULL; } static vpx_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { @@ -653,7 +656,8 @@ static int fp_estimate_block_noise(MACROBLOCK *x, BLOCK_SIZE bsize) { return block_noise << 2; // Scale << 2 to account for sampling. } -#if ENABLE_MT_BIT_MATCH +// This function is called to test the functionality of row based +// multi-threading in unit tests for bit-exactness static void accumulate_floating_point_stats(VP9_COMP *cpi, TileDataEnc *first_tile_col) { VP9_COMMON *const cm = &cpi->common; @@ -673,7 +677,6 @@ static void accumulate_floating_point_stats(VP9_COMP *cpi, } } } -#endif static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps, FIRSTPASS_DATA *fp_acc_data) { @@ -814,6 +817,10 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, : NULL; MODE_INFO mi_above, mi_left; + double mb_intra_factor; + double mb_brightness_factor; + double mb_neutral_count; + // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL)); @@ -871,9 +878,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col); double log_intra; int level_sample; -#if ENABLE_MT_BIT_MATCH const int mb_index = mb_row * cm->mb_cols + mb_col; -#endif #if CONFIG_FP_MB_STATS const int mb_index = mb_row * cm->mb_cols + mb_col; @@ -972,16 +977,15 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, vpx_clear_system_state(); log_intra = log(this_error + 1.0); if (log_intra < 10.0) { - fp_acc_data->intra_factor += 1.0 + ((10.0 - log_intra) * 0.05); -#if ENABLE_MT_BIT_MATCH - cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = - 1.0 + ((10.0 - log_intra) * 0.05); -#endif + mb_intra_factor = 1.0 + ((10.0 - log_intra) * 0.05); + fp_acc_data->intra_factor += mb_intra_factor; + if (cpi->oxcf.ethread_bit_match) + cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = + mb_intra_factor; } else { fp_acc_data->intra_factor += 1.0; -#if ENABLE_MT_BIT_MATCH - cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0; -#endif + if (cpi->oxcf.ethread_bit_match) + cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0; } #if CONFIG_VP9_HIGHBITDEPTH @@ -993,17 +997,16 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, level_sample = x->plane[0].src.buf[0]; #endif if ((level_sample < DARK_THRESH) && (log_intra < 9.0)) { - fp_acc_data->brightness_factor += - 1.0 + (0.01 * (DARK_THRESH - level_sample)); -#if ENABLE_MT_BIT_MATCH - cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = - 1.0 + (0.01 * (DARK_THRESH - level_sample)); -#endif + mb_brightness_factor = 1.0 + (0.01 * (DARK_THRESH - level_sample)); + fp_acc_data->brightness_factor += mb_brightness_factor; + if (cpi->oxcf.ethread_bit_match) + cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = + mb_brightness_factor; } else { fp_acc_data->brightness_factor += 1.0; -#if ENABLE_MT_BIT_MATCH - cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = 1.0; -#endif + if (cpi->oxcf.ethread_bit_match) + cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = + 1.0; } // Intrapenalty below deals with situations where the intra and inter @@ -1163,19 +1166,19 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, if (((this_error - intrapenalty) * 9 <= motion_error * 10) && (this_error < (2 * intrapenalty))) { fp_acc_data->neutral_count += 1.0; -#if ENABLE_MT_BIT_MATCH - cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = 1.0; -#endif + if (cpi->oxcf.ethread_bit_match) + cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = + 1.0; // Also track cases where the intra is not much worse than the inter // and use this in limiting the GF/arf group length. } else if ((this_error > NCOUNT_INTRA_THRESH) && (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) { - fp_acc_data->neutral_count += + mb_neutral_count = (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error); -#if ENABLE_MT_BIT_MATCH - cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = - (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error); -#endif + fp_acc_data->neutral_count += mb_neutral_count; + if (cpi->oxcf.ethread_bit_match) + cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = + mb_neutral_count; } mv.row *= 8; @@ -1421,6 +1424,11 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { cm->log2_tile_rows = 0; + if (cpi->oxcf.ethread_bit_match && cpi->twopass.fp_mb_float_stats == NULL) + CHECK_MEM_ERROR( + cm, cpi->twopass.fp_mb_float_stats, + vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1)); + { FIRSTPASS_STATS fps; TileDataEnc *first_tile_col; @@ -1433,15 +1441,14 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { } else { cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write; -#if ENABLE_MT_BIT_MATCH - cm->log2_tile_cols = 0; - vp9_zero_array(cpi->twopass.fp_mb_float_stats, cm->MBs); -#endif + if (cpi->oxcf.ethread_bit_match) { + cm->log2_tile_cols = 0; + vp9_zero_array(cpi->twopass.fp_mb_float_stats, cm->MBs); + } vp9_encode_fp_row_mt(cpi); first_tile_col = &cpi->tile_data[0]; -#if ENABLE_MT_BIT_MATCH - accumulate_floating_point_stats(cpi, first_tile_col); -#endif + if (cpi->oxcf.ethread_bit_match) + accumulate_floating_point_stats(cpi, first_tile_col); first_pass_stat_calc(cpi, &fps, &(first_tile_col->fp_data)); } diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index fd6fe66fb..d660aa1ff 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -41,14 +41,11 @@ typedef struct { #define INVALID_ROW -1 -#define ENABLE_MT_BIT_MATCH 0 -#if ENABLE_MT_BIT_MATCH typedef struct { double frame_mb_intra_factor; double frame_mb_brightness_factor; double frame_mb_neutral_count; } FP_MB_FLOAT_STATS; -#endif typedef struct { double intra_factor; @@ -153,9 +150,7 @@ typedef struct { FIRSTPASS_MB_STATS firstpass_mb_stats; #endif -#if ENABLE_MT_BIT_MATCH FP_MB_FLOAT_STATS *fp_mb_float_stats; -#endif // An indication of the content type of the current frame FRAME_CONTENT_TYPE fr_content_type;