Row based multi-threading of ARNR filtering stage
Change-Id: Ic238d32c7e10b730342224ab56712a89a6026a8f
This commit is contained in:
parent
726556dde9
commit
91f01a2060
@ -1571,6 +1571,11 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
|
||||
cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
|
||||
cpi->oxcf.new_mt && !cpi->use_svc)
|
||||
cpi->new_mt = 1;
|
||||
|
||||
if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
|
||||
(cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.new_mt &&
|
||||
!cpi->use_svc)
|
||||
cpi->new_mt = 1;
|
||||
}
|
||||
|
||||
#ifndef M_LOG2_E
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "vp9/encoder/vp9_ethread.h"
|
||||
#include "vp9/encoder/vp9_firstpass.h"
|
||||
#include "vp9/encoder/vp9_multi_thread.h"
|
||||
#include "vp9/encoder/vp9_temporal_filter.h"
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
|
||||
static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
|
||||
@ -464,3 +465,80 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
|
||||
accumulate_fp_tile_stat(first_tile_col, this_tile);
|
||||
}
|
||||
}
|
||||
|
||||
static int temporal_filter_worker_hook(EncWorkerData *const thread_data,
|
||||
MultiThreadHandle *multi_thread_ctxt) {
|
||||
VP9_COMP *const cpi = thread_data->cpi;
|
||||
const VP9_COMMON *const cm = &cpi->common;
|
||||
const int tile_cols = 1 << cm->log2_tile_cols;
|
||||
int tile_row, tile_col;
|
||||
int mb_col_start, mb_col_end;
|
||||
TileDataEnc *this_tile;
|
||||
int end_of_frame;
|
||||
int thread_id = thread_data->thread_id;
|
||||
int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
|
||||
JobNode *proc_job = NULL;
|
||||
int mb_row;
|
||||
|
||||
end_of_frame = 0;
|
||||
while (0 == end_of_frame) {
|
||||
// Get the next job in the queue
|
||||
proc_job =
|
||||
(JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
|
||||
if (NULL == proc_job) {
|
||||
// Query for the status of other tiles
|
||||
end_of_frame = vp9_get_tiles_proc_status(
|
||||
multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
|
||||
tile_cols);
|
||||
} else {
|
||||
tile_col = proc_job->tile_col_id;
|
||||
tile_row = proc_job->tile_row_id;
|
||||
this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
|
||||
mb_col_start = (this_tile->tile_info.mi_col_start) >> 1;
|
||||
mb_col_end = (this_tile->tile_info.mi_col_end + 1) >> 1;
|
||||
mb_row = proc_job->vert_unit_row_num;
|
||||
|
||||
vp9_temporal_filter_iterate_row_c(cpi, thread_data->td, mb_row,
|
||||
mb_col_start, mb_col_end);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
const int tile_cols = 1 << cm->log2_tile_cols;
|
||||
const int tile_rows = 1 << cm->log2_tile_rows;
|
||||
MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
|
||||
int num_workers = cpi->num_workers ? cpi->num_workers : 1;
|
||||
int i;
|
||||
|
||||
if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
|
||||
multi_thread_ctxt->allocated_tile_rows < tile_rows ||
|
||||
multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
|
||||
vp9_row_mt_mem_dealloc(cpi);
|
||||
vp9_init_tile_data(cpi);
|
||||
vp9_row_mt_mem_alloc(cpi);
|
||||
} else {
|
||||
vp9_init_tile_data(cpi);
|
||||
}
|
||||
|
||||
create_enc_workers(cpi, num_workers);
|
||||
|
||||
vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
|
||||
|
||||
vp9_prepare_job_queue(cpi, ARNR_JOB);
|
||||
|
||||
for (i = 0; i < num_workers; i++) {
|
||||
EncWorkerData *thread_data;
|
||||
thread_data = &cpi->tile_thr_data[i];
|
||||
|
||||
// Before encoding a frame, copy the thread data from cpi.
|
||||
if (thread_data->td != &cpi->td) {
|
||||
thread_data->td->mb = cpi->td.mb;
|
||||
}
|
||||
}
|
||||
|
||||
launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,
|
||||
multi_thread_ctxt, num_workers);
|
||||
}
|
||||
|
@ -61,6 +61,8 @@ void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, struct VP9Common *cm,
|
||||
// Deallocate row based multi-threading synchronization related mutex and data.
|
||||
void vp9_row_mt_sync_mem_dealloc(VP9RowMTSync *row_mt_sync);
|
||||
|
||||
void vp9_temporal_filter_row_mt(struct VP9_COMP *cpi);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@ -2381,9 +2381,6 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
|
||||
cost_list[4] = INT_MAX;
|
||||
}
|
||||
|
||||
// Keep track of number of searches (this frame in this thread).
|
||||
++(*x->m_search_count_ptr);
|
||||
|
||||
switch (method) {
|
||||
case FAST_DIAMOND:
|
||||
var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
|
||||
@ -2410,6 +2407,9 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
|
||||
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
|
||||
cost_list, fn_ptr, ref_mv, tmp_mv);
|
||||
|
||||
// Keep track of number of searches (this frame in this thread).
|
||||
++(*x->m_search_count_ptr);
|
||||
|
||||
// Should we allow a follow on exhaustive search?
|
||||
if (is_exhaustive_allowed(cpi, x)) {
|
||||
int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "vp9/common/vp9_quant_common.h"
|
||||
#include "vp9/common/vp9_reconinter.h"
|
||||
#include "vp9/encoder/vp9_encodeframe.h"
|
||||
#include "vp9/encoder/vp9_ethread.h"
|
||||
#include "vp9/encoder/vp9_extend.h"
|
||||
#include "vp9/encoder/vp9_firstpass.h"
|
||||
#include "vp9/encoder/vp9_mcomp.h"
|
||||
@ -262,9 +263,9 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
|
||||
return bestsme;
|
||||
}
|
||||
|
||||
static void temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
|
||||
int mb_row, int mb_col_start,
|
||||
int mb_col_end) {
|
||||
void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
|
||||
int mb_row, int mb_col_start,
|
||||
int mb_col_end) {
|
||||
ARNRFilterData *arnr_filter_data = &cpi->arnr_filter_data;
|
||||
YV12_BUFFER_CONFIG **frames = arnr_filter_data->frames;
|
||||
int frame_count = arnr_filter_data->frame_count;
|
||||
@ -571,8 +572,8 @@ static void temporal_filter_iterate_tile_c(VP9_COMP *cpi, int tile_row,
|
||||
int mb_row;
|
||||
|
||||
for (mb_row = mb_row_start; mb_row < mb_row_end; mb_row++) {
|
||||
temporal_filter_iterate_row_c(cpi, &cpi->td, mb_row, mb_col_start,
|
||||
mb_col_end);
|
||||
vp9_temporal_filter_iterate_row_c(cpi, &cpi->td, mb_row, mb_col_start,
|
||||
mb_col_end);
|
||||
}
|
||||
}
|
||||
|
||||
@ -765,5 +766,8 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
|
||||
set_error_per_bit(&cpi->td.mb, rdmult);
|
||||
vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX);
|
||||
|
||||
temporal_filter_iterate_c(cpi);
|
||||
if (!cpi->new_mt)
|
||||
temporal_filter_iterate_c(cpi);
|
||||
else
|
||||
vp9_temporal_filter_row_mt(cpi);
|
||||
}
|
||||
|
@ -20,6 +20,10 @@ extern "C" {
|
||||
void vp9_temporal_filter_init(void);
|
||||
void vp9_temporal_filter(VP9_COMP *cpi, int distance);
|
||||
|
||||
void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
|
||||
int mb_row, int mb_col_start,
|
||||
int mb_col_end);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user