c17b62e1bd
Change bitreading functions to use a larger window which is refilled less often. This makes it cheap enough to do bounds checking each time the window is refilled, which avoids the need to copy the input into a large circular buffer. This uses less memory and speeds up the total decode time by 1.6% on an ARM11, 2.8% on a Cortex A8, and 2.2% on x86-32, but less than 1% on x86-64. Inlining vp8dx_bool_decoder_fill() has a big penalty on x86-32, as does moving the refill loop to the front of vp8dx_decode_bool(). However, having the refill loop between computation of the split values and the branch in vp8_decode_mb_tokens() is a big win on ARM (presumably due to memory latency and code size: refilling after normalization duplicates the code in the DECODE_AND_BRANCH_IF_ZERO and DECODE_AND_LOOP_IF_ZERO cases. Unfortunately, refilling at the end of vp8dx_bool_decoder_fill() and at the beginning of each decode step in vp8_decode_mb_tokens() means the latter requires an extra refill at the end. Platform-specific versions could avoid the problem, but would require most of detokenize.c to be duplicated. Change-Id: I16c782a63376f2a15b78f8086d899b987204c1c7
599 lines
19 KiB
C
599 lines
19 KiB
C
/*
|
|
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
|
|
#ifndef WIN32
|
|
# include <unistd.h>
|
|
#endif
|
|
#include "onyxd_int.h"
|
|
#include "vpx_mem/vpx_mem.h"
|
|
#include "threading.h"
|
|
|
|
#include "loopfilter.h"
|
|
#include "extend.h"
|
|
#include "vpx_ports/vpx_timer.h"
|
|
|
|
extern void vp8_decode_mb_row(VP8D_COMP *pbi,
|
|
VP8_COMMON *pc,
|
|
int mb_row,
|
|
MACROBLOCKD *xd);
|
|
|
|
extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
|
|
extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
|
|
|
|
void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
|
|
{
|
|
|
|
|
|
|
|
#if CONFIG_MULTITHREAD
|
|
VP8_COMMON *const pc = & pbi->common;
|
|
int i, j;
|
|
|
|
for (i = 0; i < count; i++)
|
|
{
|
|
MACROBLOCKD *mbd = &mbrd[i].mbd;
|
|
#if CONFIG_RUNTIME_CPU_DETECT
|
|
mbd->rtcd = xd->rtcd;
|
|
#endif
|
|
|
|
|
|
mbd->subpixel_predict = xd->subpixel_predict;
|
|
mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
|
|
mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
|
|
mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
|
|
mbd->gf_active_ptr = xd->gf_active_ptr;
|
|
|
|
mbd->mode_info = pc->mi - 1;
|
|
mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
|
|
mbd->mode_info_stride = pc->mode_info_stride;
|
|
|
|
mbd->frame_type = pc->frame_type;
|
|
mbd->frames_since_golden = pc->frames_since_golden;
|
|
mbd->frames_till_alt_ref_frame = pc->frames_till_alt_ref_frame;
|
|
|
|
mbd->pre = pc->last_frame;
|
|
mbd->dst = pc->new_frame;
|
|
|
|
|
|
|
|
|
|
vp8_setup_block_dptrs(mbd);
|
|
vp8_build_block_doffsets(mbd);
|
|
mbd->segmentation_enabled = xd->segmentation_enabled;
|
|
mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
|
|
vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
|
|
|
|
mbd->mbmi.mode = DC_PRED;
|
|
mbd->mbmi.uv_mode = DC_PRED;
|
|
|
|
mbd->current_bc = &pbi->bc2;
|
|
|
|
for (j = 0; j < 25; j++)
|
|
{
|
|
mbd->block[j].dequant = xd->block[j].dequant;
|
|
}
|
|
}
|
|
|
|
#else
|
|
(void) pbi;
|
|
(void) xd;
|
|
(void) mbrd;
|
|
(void) count;
|
|
#endif
|
|
}
|
|
|
|
|
|
THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
|
{
|
|
#if CONFIG_MULTITHREAD
|
|
int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
|
|
VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
|
|
MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
|
|
ENTROPY_CONTEXT mb_row_left_context[4][4];
|
|
|
|
while (1)
|
|
{
|
|
if (pbi->b_multithreaded_rd == 0)
|
|
break;
|
|
|
|
//if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
|
|
if (sem_wait(&pbi->h_event_mbrdecoding[ithread]) == 0)
|
|
{
|
|
if (pbi->b_multithreaded_rd == 0)
|
|
break;
|
|
else
|
|
{
|
|
VP8_COMMON *pc = &pbi->common;
|
|
int mb_row = mbrd->mb_row;
|
|
MACROBLOCKD *xd = &mbrd->mbd;
|
|
|
|
//printf("ithread:%d mb_row %d\n", ithread, mb_row);
|
|
int i;
|
|
int recon_yoffset, recon_uvoffset;
|
|
int mb_col;
|
|
int recon_y_stride = pc->last_frame.y_stride;
|
|
int recon_uv_stride = pc->last_frame.uv_stride;
|
|
|
|
volatile int *last_row_current_mb_col;
|
|
|
|
if (ithread > 0)
|
|
last_row_current_mb_col = &pbi->mb_row_di[ithread-1].current_mb_col;
|
|
else
|
|
last_row_current_mb_col = &pbi->current_mb_col_main;
|
|
|
|
recon_yoffset = mb_row * recon_y_stride * 16;
|
|
recon_uvoffset = mb_row * recon_uv_stride * 8;
|
|
// reset above block coeffs
|
|
|
|
xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
|
|
xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
|
|
xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
|
|
xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
|
|
xd->left_context = mb_row_left_context;
|
|
vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
|
|
xd->up_available = (mb_row != 0);
|
|
|
|
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
|
|
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
|
|
|
|
for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
|
|
{
|
|
|
|
while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != pc->mb_cols - 1)
|
|
{
|
|
x86_pause_hint();
|
|
thread_sleep(0);
|
|
}
|
|
|
|
// Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
|
|
// the partition_bmi array is unused in the decoder, so don't copy it.
|
|
vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi,
|
|
sizeof(MB_MODE_INFO) - sizeof(xd->mbmi.partition_bmi));
|
|
|
|
if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
|
|
{
|
|
for (i = 0; i < 16; i++)
|
|
{
|
|
BLOCKD *d = &xd->block[i];
|
|
vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
|
|
}
|
|
}
|
|
|
|
// Distance of Mb to the various image edges.
|
|
// These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
|
xd->mb_to_left_edge = -((mb_col * 16) << 3);
|
|
xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
|
|
|
|
xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
|
|
xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
|
|
xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
|
|
|
|
xd->left_available = (mb_col != 0);
|
|
|
|
// Select the appropriate reference frame for this MB
|
|
if (xd->mbmi.ref_frame == LAST_FRAME)
|
|
{
|
|
xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
|
|
xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
|
|
xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
|
|
}
|
|
else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
|
|
{
|
|
// Golden frame reconstruction buffer
|
|
xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
|
|
xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
|
|
xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
|
|
}
|
|
else
|
|
{
|
|
// Alternate reference frame reconstruction buffer
|
|
xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
|
|
xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
|
|
xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
|
|
}
|
|
|
|
vp8_build_uvmvs(xd, pc->full_pixel);
|
|
|
|
vp8_decode_macroblock(pbi, xd);
|
|
|
|
|
|
recon_yoffset += 16;
|
|
recon_uvoffset += 8;
|
|
|
|
++xd->mode_info_context; /* next mb */
|
|
|
|
xd->gf_active_ptr++; // GF useage flag for next MB
|
|
|
|
xd->above_context[Y1CONTEXT] += 4;
|
|
xd->above_context[UCONTEXT ] += 2;
|
|
xd->above_context[VCONTEXT ] += 2;
|
|
xd->above_context[Y2CONTEXT] ++;
|
|
pbi->mb_row_di[ithread].current_mb_col = mb_col;
|
|
|
|
}
|
|
|
|
// adjust to the next row of mbs
|
|
vp8_extend_mb_row(
|
|
&pc->new_frame,
|
|
xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
|
|
);
|
|
|
|
++xd->mode_info_context; /* skip prediction column */
|
|
|
|
// since we have multithread
|
|
xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
|
|
|
|
//memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
|
|
if ((mb_row & 1) == 1)
|
|
{
|
|
pbi->last_mb_row_decoded = mb_row;
|
|
//printf("S%d", pbi->last_mb_row_decoded);
|
|
}
|
|
|
|
if (ithread == (pbi->decoding_thread_count - 1) || mb_row == pc->mb_rows - 1)
|
|
{
|
|
//SetEvent(pbi->h_event_main);
|
|
sem_post(&pbi->h_event_main);
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#else
|
|
(void) p_data;
|
|
#endif
|
|
|
|
return 0 ;
|
|
}
|
|
|
|
THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
|
|
{
|
|
#if CONFIG_MULTITHREAD
|
|
VP8D_COMP *pbi = (VP8D_COMP *)p_data;
|
|
|
|
while (1)
|
|
{
|
|
if (pbi->b_multithreaded_lf == 0)
|
|
break;
|
|
|
|
//printf("before waiting for start_lpf\n");
|
|
|
|
//if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
|
|
if (sem_wait(&pbi->h_event_start_lpf) == 0)
|
|
{
|
|
if (pbi->b_multithreaded_lf == 0) // we're shutting down
|
|
break;
|
|
else
|
|
{
|
|
|
|
VP8_COMMON *cm = &pbi->common;
|
|
MACROBLOCKD *mbd = &pbi->lpfmb;
|
|
int default_filt_lvl = pbi->common.filter_level;
|
|
|
|
YV12_BUFFER_CONFIG *post = &cm->new_frame;
|
|
loop_filter_info *lfi = cm->lf_info;
|
|
|
|
int mb_row;
|
|
int mb_col;
|
|
|
|
|
|
int baseline_filter_level[MAX_MB_SEGMENTS];
|
|
int filter_level;
|
|
int alt_flt_enabled = mbd->segmentation_enabled;
|
|
|
|
int i;
|
|
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
|
|
|
volatile int *last_mb_row_decoded = &pbi->last_mb_row_decoded;
|
|
|
|
//MODE_INFO * this_mb_mode_info = cm->mi;
|
|
mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list
|
|
|
|
// Note the baseline filter values for each segment
|
|
if (alt_flt_enabled)
|
|
{
|
|
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
|
{
|
|
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
|
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
|
else
|
|
{
|
|
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
|
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
|
baseline_filter_level[i] = default_filt_lvl;
|
|
}
|
|
|
|
// Initialize the loop filter for this frame.
|
|
vp8_init_loop_filter(cm);
|
|
|
|
// Set up the buffer pointers
|
|
y_ptr = post->y_buffer;
|
|
u_ptr = post->u_buffer;
|
|
v_ptr = post->v_buffer;
|
|
|
|
// vp8_filter each macro block
|
|
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
|
{
|
|
|
|
while (mb_row >= *last_mb_row_decoded)
|
|
{
|
|
x86_pause_hint();
|
|
thread_sleep(0);
|
|
}
|
|
|
|
//printf("R%d", mb_row);
|
|
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
|
{
|
|
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
|
|
|
filter_level = baseline_filter_level[Segment];
|
|
|
|
// Apply any context driven MB level adjustment
|
|
vp8_adjust_mb_lf_value(mbd, &filter_level);
|
|
|
|
if (filter_level)
|
|
{
|
|
if (mb_col > 0)
|
|
cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
|
|
|
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
|
cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
|
|
|
// don't apply across umv border
|
|
if (mb_row > 0)
|
|
cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
|
|
|
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
|
cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
|
}
|
|
|
|
y_ptr += 16;
|
|
u_ptr += 8;
|
|
v_ptr += 8;
|
|
|
|
mbd->mode_info_context++; // step to next MB
|
|
|
|
}
|
|
|
|
y_ptr += post->y_stride * 16 - post->y_width;
|
|
u_ptr += post->uv_stride * 8 - post->uv_width;
|
|
v_ptr += post->uv_stride * 8 - post->uv_width;
|
|
|
|
mbd->mode_info_context++; // Skip border mb
|
|
}
|
|
|
|
//printf("R%d\n", mb_row);
|
|
// When done, signal main thread that ME is finished
|
|
//SetEvent(pbi->h_event_lpf);
|
|
sem_post(&pbi->h_event_lpf);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
#else
|
|
(void) p_data;
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
void vp8_decoder_create_threads(VP8D_COMP *pbi)
|
|
{
|
|
#if CONFIG_MULTITHREAD
|
|
int core_count = 0;
|
|
int ithread;
|
|
|
|
pbi->b_multithreaded_rd = 0;
|
|
pbi->b_multithreaded_lf = 0;
|
|
pbi->allocated_decoding_thread_count = 0;
|
|
core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; //vp8_get_proc_core_count();
|
|
if (core_count > 1)
|
|
{
|
|
sem_init(&pbi->h_event_lpf, 0, 0);
|
|
sem_init(&pbi->h_event_start_lpf, 0, 0);
|
|
pbi->b_multithreaded_lf = 1;
|
|
pthread_create(&pbi->h_thread_lpf, 0, vp8_thread_loop_filter, (pbi));
|
|
}
|
|
|
|
if (core_count > 1)
|
|
{
|
|
pbi->b_multithreaded_rd = 1;
|
|
pbi->decoding_thread_count = core_count - 1;
|
|
|
|
CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
|
|
CHECK_MEM_ERROR(pbi->h_event_mbrdecoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
|
|
CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
|
|
vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
|
|
CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
|
|
|
|
for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
|
|
{
|
|
sem_init(&pbi->h_event_mbrdecoding[ithread], 0, 0);
|
|
|
|
pbi->de_thread_data[ithread].ithread = ithread;
|
|
pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
|
|
pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
|
|
|
|
pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
|
|
|
|
}
|
|
|
|
sem_init(&pbi->h_event_main, 0, 0);
|
|
pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
|
|
}
|
|
|
|
#else
|
|
(void) pbi;
|
|
#endif
|
|
}
|
|
|
|
void vp8_decoder_remove_threads(VP8D_COMP *pbi)
|
|
{
|
|
#if CONFIG_MULTITHREAD
|
|
|
|
if (pbi->b_multithreaded_lf)
|
|
{
|
|
pbi->b_multithreaded_lf = 0;
|
|
sem_post(&pbi->h_event_start_lpf);
|
|
pthread_join(pbi->h_thread_lpf, 0);
|
|
sem_destroy(&pbi->h_event_start_lpf);
|
|
}
|
|
|
|
//shutdown MB Decoding thread;
|
|
if (pbi->b_multithreaded_rd)
|
|
{
|
|
pbi->b_multithreaded_rd = 0;
|
|
// allow all threads to exit
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
|
|
{
|
|
|
|
sem_post(&pbi->h_event_mbrdecoding[i]);
|
|
pthread_join(pbi->h_decoding_thread[i], NULL);
|
|
}
|
|
}
|
|
{
|
|
|
|
int i;
|
|
for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
|
|
{
|
|
sem_destroy(&pbi->h_event_mbrdecoding[i]);
|
|
}
|
|
|
|
|
|
}
|
|
|
|
sem_destroy(&pbi->h_event_main);
|
|
|
|
if (pbi->h_decoding_thread)
|
|
{
|
|
vpx_free(pbi->h_decoding_thread);
|
|
pbi->h_decoding_thread = NULL;
|
|
}
|
|
|
|
if (pbi->h_event_mbrdecoding)
|
|
{
|
|
vpx_free(pbi->h_event_mbrdecoding);
|
|
pbi->h_event_mbrdecoding = NULL;
|
|
}
|
|
|
|
if (pbi->mb_row_di)
|
|
{
|
|
vpx_free(pbi->mb_row_di);
|
|
pbi->mb_row_di = NULL ;
|
|
}
|
|
|
|
if (pbi->de_thread_data)
|
|
{
|
|
vpx_free(pbi->de_thread_data);
|
|
pbi->de_thread_data = NULL;
|
|
}
|
|
}
|
|
|
|
#else
|
|
(void) pbi;
|
|
#endif
|
|
}
|
|
|
|
|
|
void vp8_start_lfthread(VP8D_COMP *pbi)
|
|
{
|
|
#if CONFIG_MULTITHREAD
|
|
memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
|
|
pbi->last_mb_row_decoded = 0;
|
|
sem_post(&pbi->h_event_start_lpf);
|
|
#else
|
|
(void) pbi;
|
|
#endif
|
|
}
|
|
|
|
void vp8_stop_lfthread(VP8D_COMP *pbi)
|
|
{
|
|
#if CONFIG_MULTITHREAD
|
|
struct vpx_usec_timer timer;
|
|
|
|
vpx_usec_timer_start(&timer);
|
|
|
|
sem_wait(&pbi->h_event_lpf);
|
|
|
|
vpx_usec_timer_mark(&timer);
|
|
pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
|
|
#else
|
|
(void) pbi;
|
|
#endif
|
|
}
|
|
|
|
|
|
void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
|
|
MACROBLOCKD *xd)
|
|
{
|
|
#if CONFIG_MULTITHREAD
|
|
int mb_row;
|
|
VP8_COMMON *pc = &pbi->common;
|
|
|
|
int ibc = 0;
|
|
int num_part = 1 << pbi->common.multi_token_partition;
|
|
|
|
vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
|
|
|
|
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
|
|
{
|
|
int i;
|
|
pbi->current_mb_col_main = -1;
|
|
|
|
xd->current_bc = &pbi->mbc[ibc];
|
|
ibc++ ;
|
|
|
|
if (ibc == num_part)
|
|
ibc = 0;
|
|
|
|
for (i = 0; i < pbi->decoding_thread_count; i++)
|
|
{
|
|
if ((mb_row + i + 1) >= pc->mb_rows)
|
|
break;
|
|
|
|
pbi->mb_row_di[i].mb_row = mb_row + i + 1;
|
|
pbi->mb_row_di[i].mbd.current_bc = &pbi->mbc[ibc];
|
|
ibc++;
|
|
|
|
if (ibc == num_part)
|
|
ibc = 0;
|
|
|
|
pbi->mb_row_di[i].current_mb_col = -1;
|
|
sem_post(&pbi->h_event_mbrdecoding[i]);
|
|
}
|
|
|
|
vp8_decode_mb_row(pbi, pc, mb_row, xd);
|
|
|
|
xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
|
|
|
|
if (mb_row < pc->mb_rows - 1)
|
|
{
|
|
sem_wait(&pbi->h_event_main);
|
|
}
|
|
}
|
|
|
|
pbi->last_mb_row_decoded = mb_row;
|
|
#else
|
|
(void) pbi;
|
|
(void) xd;
|
|
#endif
|
|
}
|