diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index 972509d7d..f4e20e1af 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -54,7 +54,7 @@ typedef struct frame_contexts { int initialized; } FRAME_CONTEXT; -typedef struct { +typedef struct FRAME_COUNTS { unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; unsigned int uv_mode[INTRA_MODES][INTRA_MODES]; unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES]; diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c index 98799a3d6..1a93a34cc 100644 --- a/vp9/common/vp9_thread_common.c +++ b/vp9/common/vp9_thread_common.c @@ -10,6 +10,7 @@ #include "./vpx_config.h" #include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_reconinter.h" @@ -299,3 +300,119 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { vp9_zero(*lf_sync); } } + +// Accumulate frame counts. +void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts, + int is_dec) { + int i, j, k, l, m; + + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) + for (j = 0; j < INTRA_MODES; j++) + cm->counts.y_mode[i][j] += counts->y_mode[i][j]; + + for (i = 0; i < INTRA_MODES; i++) + for (j = 0; j < INTRA_MODES; j++) + cm->counts.uv_mode[i][j] += counts->uv_mode[i][j]; + + for (i = 0; i < PARTITION_CONTEXTS; i++) + for (j = 0; j < PARTITION_TYPES; j++) + cm->counts.partition[i][j] += counts->partition[i][j]; + + if (is_dec) { + int n; + for (i = 0; i < TX_SIZES; i++) + for (j = 0; j < PLANE_TYPES; j++) + for (k = 0; k < REF_TYPES; k++) + for (l = 0; l < COEF_BANDS; l++) + for (m = 0; m < COEFF_CONTEXTS; m++) { + cm->counts.eob_branch[i][j][k][l][m] += + counts->eob_branch[i][j][k][l][m]; + for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) + cm->counts.coef[i][j][k][l][m][n] += + counts->coef[i][j][k][l][m][n]; + } + } else { + for (i = 0; i < TX_SIZES; i++) + for (j = 0; j < PLANE_TYPES; j++) + for (k = 0; k < REF_TYPES; k++) + for (l = 0; l < COEF_BANDS; l++) + for (m = 0; m < COEFF_CONTEXTS; m++) + cm->counts.eob_branch[i][j][k][l][m] += + counts->eob_branch[i][j][k][l][m]; + // In the encoder, cm->counts.coef is only updated at frame + // level, so not need to accumulate it here. + // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) + // cm->counts.coef[i][j][k][l][m][n] += + // counts->coef[i][j][k][l][m][n]; + } + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) + for (j = 0; j < SWITCHABLE_FILTERS; j++) + cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j]; + + for (i = 0; i < INTER_MODE_CONTEXTS; i++) + for (j = 0; j < INTER_MODES; j++) + cm->counts.inter_mode[i][j] += counts->inter_mode[i][j]; + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + for (j = 0; j < 2; j++) + cm->counts.intra_inter[i][j] += counts->intra_inter[i][j]; + + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + for (j = 0; j < 2; j++) + cm->counts.comp_inter[i][j] += counts->comp_inter[i][j]; + + for (i = 0; i < REF_CONTEXTS; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k]; + + for (i = 0; i < REF_CONTEXTS; i++) + for (j = 0; j < 2; j++) + cm->counts.comp_ref[i][j] += counts->comp_ref[i][j]; + + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + for (j = 0; j < TX_SIZES; j++) + cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j]; + + for (j = 0; j < TX_SIZES - 1; j++) + cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j]; + + for (j = 0; j < TX_SIZES - 2; j++) + cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j]; + } + + for (i = 0; i < SKIP_CONTEXTS; i++) + for (j = 0; j < 2; j++) + cm->counts.skip[i][j] += counts->skip[i][j]; + + for (i = 0; i < MV_JOINTS; i++) + cm->counts.mv.joints[i] += counts->mv.joints[i]; + + for (k = 0; k < 2; k++) { + nmv_component_counts *comps = &cm->counts.mv.comps[k]; + nmv_component_counts *comps_t = &counts->mv.comps[k]; + + for (i = 0; i < 2; i++) { + comps->sign[i] += comps_t->sign[i]; + comps->class0_hp[i] += comps_t->class0_hp[i]; + comps->hp[i] += comps_t->hp[i]; + } + + for (i = 0; i < MV_CLASSES; i++) + comps->classes[i] += comps_t->classes[i]; + + for (i = 0; i < CLASS0_SIZE; i++) { + comps->class0[i] += comps_t->class0[i]; + for (j = 0; j < MV_FP_SIZE; j++) + comps->class0_fp[i][j] += comps_t->class0_fp[i][j]; + } + + for (i = 0; i < MV_OFFSET_BITS; i++) + for (j = 0; j < 2; j++) + comps->bits[i][j] += comps_t->bits[i][j]; + + for (i = 0; i < MV_FP_SIZE; i++) + comps->fp[i] += comps_t->fp[i]; + } +} diff --git a/vp9/common/vp9_thread_common.h b/vp9/common/vp9_thread_common.h index bca357e52..3b3a6996a 100644 --- a/vp9/common/vp9_thread_common.h +++ b/vp9/common/vp9_thread_common.h @@ -15,6 +15,7 @@ #include "vp9/common/vp9_thread.h" struct VP9Common; +struct FRAME_COUNTS; // Loopfilter row synchronization typedef struct VP9LfSyncData { @@ -50,4 +51,7 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9Worker *workers, int num_workers, VP9LfSync *lf_sync); +void vp9_accumulate_frame_counts(struct VP9Common *cm, + struct FRAME_COUNTS *counts, int is_dec); + #endif // VP9_COMMON_VP9_LOOPFILTER_THREAD_H_ diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 87aa161fe..1d0d8f6f9 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1085,105 +1085,6 @@ static int compare_tile_buffers(const void *a, const void *b) { } } -// Accumulate frame counts. -static void accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts) { - int i, j, k, l, m, n; - - for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - for (j = 0; j < INTRA_MODES; j++) - cm->counts.y_mode[i][j] += counts->y_mode[i][j]; - - for (i = 0; i < INTRA_MODES; i++) - for (j = 0; j < INTRA_MODES; j++) - cm->counts.uv_mode[i][j] += counts->uv_mode[i][j]; - - for (i = 0; i < PARTITION_CONTEXTS; i++) - for (j = 0; j < PARTITION_TYPES; j++) - cm->counts.partition[i][j] += counts->partition[i][j]; - - for (i = 0; i < TX_SIZES; i++) - for (j = 0; j < PLANE_TYPES; j++) - for (k = 0; k < REF_TYPES; k++) - for (l = 0; l < COEF_BANDS; l++) - for (m = 0; m < COEFF_CONTEXTS; m++) { - cm->counts.eob_branch[i][j][k][l][m] += - counts->eob_branch[i][j][k][l][m]; - for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - cm->counts.coef[i][j][k][l][m][n] += - counts->coef[i][j][k][l][m][n]; - } - - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - for (j = 0; j < SWITCHABLE_FILTERS; j++) - cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j]; - - for (i = 0; i < INTER_MODE_CONTEXTS; i++) - for (j = 0; j < INTER_MODES; j++) - cm->counts.inter_mode[i][j] += counts->inter_mode[i][j]; - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - for (j = 0; j < 2; j++) - cm->counts.intra_inter[i][j] += counts->intra_inter[i][j]; - - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - for (j = 0; j < 2; j++) - cm->counts.comp_inter[i][j] += counts->comp_inter[i][j]; - - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) - cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k]; - - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - cm->counts.comp_ref[i][j] += counts->comp_ref[i][j]; - - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZES; j++) - cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j]; - - for (j = 0; j < TX_SIZES - 1; j++) - cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j]; - - for (j = 0; j < TX_SIZES - 2; j++) - cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j]; - } - - for (i = 0; i < SKIP_CONTEXTS; i++) - for (j = 0; j < 2; j++) - cm->counts.skip[i][j] += counts->skip[i][j]; - - for (i = 0; i < MV_JOINTS; i++) - cm->counts.mv.joints[i] += counts->mv.joints[i]; - - for (k = 0; k < 2; k++) { - nmv_component_counts *comps = &cm->counts.mv.comps[k]; - nmv_component_counts *comps_t = &counts->mv.comps[k]; - - for (i = 0; i < 2; i++) { - comps->sign[i] += comps_t->sign[i]; - comps->class0_hp[i] += comps_t->class0_hp[i]; - comps->hp[i] += comps_t->hp[i]; - } - - for (i = 0; i < MV_CLASSES; i++) - comps->classes[i] += comps_t->classes[i]; - - for (i = 0; i < CLASS0_SIZE; i++) { - comps->class0[i] += comps_t->class0[i]; - for (j = 0; j < MV_FP_SIZE; j++) - comps->class0_fp[i][j] += comps_t->class0_fp[i][j]; - } - - for (i = 0; i < MV_OFFSET_BITS; i++) - for (j = 0; j < 2; j++) - comps->bits[i][j] += comps_t->bits[i][j]; - - for (i = 0; i < MV_FP_SIZE; i++) - comps->fp[i] += comps_t->fp[i]; - } -} - static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { @@ -1333,7 +1234,7 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, for (i = 0; i < num_workers; ++i) { TileWorkerData *const tile_data = (TileWorkerData*)pbi->tile_workers[i].data1; - accumulate_frame_counts(cm, &tile_data->counts); + vp9_accumulate_frame_counts(cm, &tile_data->counts, 1); } } } diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c index 12fb4d107..9916cc06c 100644 --- a/vp9/encoder/vp9_ethread.c +++ b/vp9/encoder/vp9_ethread.c @@ -12,105 +12,6 @@ #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_ethread.h" -static void accumulate_frame_counts(VP9_COMMON *cm, ThreadData *td) { - int i, j, k, l, m; - - for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - for (j = 0; j < INTRA_MODES; j++) - cm->counts.y_mode[i][j] += td->counts->y_mode[i][j]; - - for (i = 0; i < INTRA_MODES; i++) - for (j = 0; j < INTRA_MODES; j++) - cm->counts.uv_mode[i][j] += td->counts->uv_mode[i][j]; - - for (i = 0; i < PARTITION_CONTEXTS; i++) - for (j = 0; j < PARTITION_TYPES; j++) - cm->counts.partition[i][j] += td->counts->partition[i][j]; - - for (i = 0; i < TX_SIZES; i++) - for (j = 0; j < PLANE_TYPES; j++) - for (k = 0; k < REF_TYPES; k++) - for (l = 0; l < COEF_BANDS; l++) - for (m = 0; m < COEFF_CONTEXTS; m++) - cm->counts.eob_branch[i][j][k][l][m] += - td->counts->eob_branch[i][j][k][l][m]; - // cm->counts.coef is only updated at frame level, so not need - // to accumulate it here. - // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - // cm->counts.coef[i][j][k][l][m][n] += - // td->counts->coef[i][j][k][l][m][n]; - - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - for (j = 0; j < SWITCHABLE_FILTERS; j++) - cm->counts.switchable_interp[i][j] += td->counts->switchable_interp[i][j]; - - for (i = 0; i < INTER_MODE_CONTEXTS; i++) - for (j = 0; j < INTER_MODES; j++) - cm->counts.inter_mode[i][j] += td->counts->inter_mode[i][j]; - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - for (j = 0; j < 2; j++) - cm->counts.intra_inter[i][j] += td->counts->intra_inter[i][j]; - - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - for (j = 0; j < 2; j++) - cm->counts.comp_inter[i][j] += td->counts->comp_inter[i][j]; - - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) - cm->counts.single_ref[i][j][k] += td->counts->single_ref[i][j][k]; - - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - cm->counts.comp_ref[i][j] += td->counts->comp_ref[i][j]; - - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZES; j++) - cm->counts.tx.p32x32[i][j] += td->counts->tx.p32x32[i][j]; - - for (j = 0; j < TX_SIZES - 1; j++) - cm->counts.tx.p16x16[i][j] += td->counts->tx.p16x16[i][j]; - - for (j = 0; j < TX_SIZES - 2; j++) - cm->counts.tx.p8x8[i][j] += td->counts->tx.p8x8[i][j]; - } - - for (i = 0; i < SKIP_CONTEXTS; i++) - for (j = 0; j < 2; j++) - cm->counts.skip[i][j] += td->counts->skip[i][j]; - - for (i = 0; i < MV_JOINTS; i++) - cm->counts.mv.joints[i] += td->counts->mv.joints[i]; - - for (k = 0; k < 2; k++) { - nmv_component_counts *comps = &cm->counts.mv.comps[k]; - nmv_component_counts *comps_t = &td->counts->mv.comps[k]; - - for (i = 0; i < 2; i++) { - comps->sign[i] += comps_t->sign[i]; - comps->class0_hp[i] += comps_t->class0_hp[i]; - comps->hp[i] += comps_t->hp[i]; - } - - for (i = 0; i < MV_CLASSES; i++) - comps->classes[i] += comps_t->classes[i]; - - for (i = 0; i < CLASS0_SIZE; i++) { - comps->class0[i] += comps_t->class0[i]; - for (j = 0; j < MV_FP_SIZE; j++) - comps->class0_fp[i][j] += comps_t->class0_fp[i][j]; - } - - for (i = 0; i < MV_OFFSET_BITS; i++) - for (j = 0; j < 2; j++) - comps->bits[i][j] += comps_t->bits[i][j]; - - for (i = 0; i < MV_FP_SIZE; i++) - comps->fp[i] += comps_t->fp[i]; - } -} - static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { int i, j, k, l, m, n; @@ -267,7 +168,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) { // Accumulate counters. if (i < num_workers - 1) { - accumulate_frame_counts(&cpi->common, thread_data->td); + vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } }