vp9_ethread: create enough threads while using SVC

This patch modified the thread creating code. When use_svc is true,
the number of threads created is decided by the highest resolution.
This resolved WebM issue 1018.

Change-Id: I367227b14d1f8b08bbdad3635b232a3a37bbba26
This commit is contained in:
Yunqing Wang 2015-06-12 17:10:30 -07:00
parent a4bb5f2a29
commit c98273c9e7
2 changed files with 30 additions and 7 deletions

View File

@ -630,7 +630,8 @@ int main(int argc, const char **argv) {
if (svc_ctx.speed != -1)
vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, 0);
if (svc_ctx.threads)
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
// Encode frames
while (!end_of_stream) {

View File

@ -54,6 +54,18 @@ static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
return 0;
}
static int get_max_tile_cols(VP9_COMP *cpi) {
const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2);
int mi_cols = aligned_width >> MI_SIZE_LOG2;
int min_log2_tile_cols, max_log2_tile_cols;
int log2_tile_cols;
vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
log2_tile_cols = clamp(cpi->oxcf.tile_columns,
min_log2_tile_cols, max_log2_tile_cols);
return (1 << log2_tile_cols);
}
void vp9_encode_tiles_mt(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@ -65,20 +77,30 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
// Only run once to create threads and allocate thread data.
if (cpi->num_workers == 0) {
int allocated_workers = num_workers;
// While using SVC, we need to allocate threads according to the highest
// resolution.
if (cpi->use_svc) {
int max_tile_cols = get_max_tile_cols(cpi);
allocated_workers = MIN(cpi->oxcf.max_threads, max_tile_cols);
}
CHECK_MEM_ERROR(cm, cpi->workers,
vpx_malloc(num_workers * sizeof(*cpi->workers)));
vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
vpx_calloc(allocated_workers,
sizeof(*cpi->tile_thr_data)));
for (i = 0; i < num_workers; i++) {
for (i = 0; i < allocated_workers; i++) {
VP9Worker *const worker = &cpi->workers[i];
EncWorkerData *thread_data = &cpi->tile_thr_data[i];
++cpi->num_workers;
winterface->init(worker);
if (i < num_workers - 1) {
if (i < allocated_workers - 1) {
thread_data->cpi = cpi;
// Allocate thread data.
@ -154,7 +176,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
// Set the starting tile for each thread.
thread_data->start = i;
if (i == num_workers - 1)
if (i == cpi->num_workers - 1)
winterface->execute(worker);
else
winterface->launch(worker);
@ -171,7 +193,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
// Accumulate counters.
if (i < num_workers - 1) {
if (i < cpi->num_workers - 1) {
vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
accumulate_rd_opt(&cpi->td, thread_data->td);
}