Encoder loopfilter running in its own thread

In multithreaded mode the loopfilter is running in its own thread (filter level
calculation and frame filtering). Filtering is mostly done in parallel with the
bitstream packing. Before starting the packing the loopfilter level has
to be calculated. Also any needed reference frame copying is done in the
filter thread.

Currently the encoder will create n+1 threads, where n > 1 is the number of
threads specified by application  and 1 is the extra filter thread. With n = 1
the encoder runs in single thread mode. There will never be more than n threads
running concurrently.

Change-Id: I4fb29b559a40275d6d3babb8727245c40fba931b
This commit is contained in:
Attila Nagy 2011-02-25 13:42:05 +02:00
parent cfaee9f7c6
commit 3ae2465788
3 changed files with 155 additions and 71 deletions

View File

@ -24,6 +24,35 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
extern void vp8_build_block_offsets(MACROBLOCK *x);
extern void vp8_setup_block_ptrs(MACROBLOCK *x);
#if CONFIG_MULTITHREAD
extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
static THREAD_FUNCTION loopfilter_thread(void *p_data)
{
VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
VP8_COMMON *cm = &cpi->common;
while (1)
{
if (cpi->b_multi_threaded == 0)
break;
if (sem_wait(&cpi->h_event_start_lpf) == 0)
{
if (cpi->b_multi_threaded == FALSE) // we're shutting down
break;
loopfilter_frame(cpi, cm);
sem_post(&cpi->h_event_end_lpf);
}
}
return 0;
}
#endif
static
THREAD_FUNCTION thread_encoding_proc(void *p_data)
{
@ -479,6 +508,15 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd);
}
{
LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;
sem_init(&cpi->h_event_start_lpf, 0, 0);
sem_init(&cpi->h_event_end_lpf, 0, 0);
lpfthd->ptr1 = (void *)cpi;
pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd);
}
}
}
@ -500,9 +538,14 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
sem_destroy(&cpi->h_event_start_encoding[i]);
}
sem_post(&cpi->h_event_start_lpf);
pthread_join(cpi->h_filter_thread, 0);
}
sem_destroy(&cpi->h_event_end_encoding);
sem_destroy(&cpi->h_event_end_lpf);
sem_destroy(&cpi->h_event_start_lpf);
//free thread related resources
vpx_free(cpi->h_event_start_encoding);

View File

@ -3509,6 +3509,89 @@ static BOOL recode_loop_test( VP8_COMP *cpi,
return force_recode;
}
void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
{
if (cm->no_lpf)
{
cm->filter_level = 0;
}
else
{
struct vpx_usec_timer timer;
vp8_clear_system_state();
vpx_usec_timer_start(&timer);
if (cpi->sf.auto_filter == 0)
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
else
vp8cx_pick_filter_level(cpi->Source, cpi);
vpx_usec_timer_mark(&timer);
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
}
#if CONFIG_MULTITHREAD
sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */
#endif
if (cm->filter_level > 0)
{
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level);
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
}
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
{
YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx];
// At this point the new frame has been encoded.
// If any buffer copy / swapping is signaled it should be done here.
if (cm->frame_type == KEY_FRAME)
{
vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12);
vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12);
}
else // For non key frames
{
// Code to copy between reference buffers
if (cm->copy_buffer_to_arf)
{
if (cm->copy_buffer_to_arf == 1)
{
if (cm->refresh_last_frame)
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12);
else
vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12);
}
else if (cm->copy_buffer_to_arf == 2)
vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12);
}
if (cm->copy_buffer_to_gf)
{
if (cm->copy_buffer_to_gf == 1)
{
if (cm->refresh_last_frame)
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12);
else
vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
}
else if (cm->copy_buffer_to_gf == 2)
vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12);
}
}
}
}
static void encode_frame_to_data_rate
(
VP8_COMP *cpi,
@ -4058,8 +4141,8 @@ static void encode_frame_to_data_rate
vp8_setup_key_frame(cpi);
// transform / motion compensation build reconstruction frame
vp8_encode_frame(cpi);
cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
@ -4408,92 +4491,43 @@ static void encode_frame_to_data_rate
else
cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
if (cm->no_lpf)
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded)
{
cm->filter_level = 0;
sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */
}
else
#endif
{
struct vpx_usec_timer timer;
vpx_usec_timer_start(&timer);
if (cpi->sf.auto_filter == 0)
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
else
vp8cx_pick_filter_level(cpi->Source, cpi);
vpx_usec_timer_mark(&timer);
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
loopfilter_frame(cpi, cm);
}
if (cm->filter_level > 0)
{
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level);
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
}
/* Move storing frame_type out of the above loop since it is also
* needed in motion search besides loopfilter */
cm->last_frame_type = cm->frame_type;
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
if (cpi->oxcf.error_resilient_mode == 1)
{
cm->refresh_entropy_probs = 0;
}
#if CONFIG_MULTITHREAD
/* wait that filter_level is picked so that we can continue with stream packing */
if (cpi->b_multi_threaded)
sem_wait(&cpi->h_event_end_lpf);
#endif
// build the bitstream
vp8_pack_bitstream(cpi, dest, size);
#if CONFIG_MULTITHREAD
/* wait for loopfilter thread done */
if (cpi->b_multi_threaded)
{
YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx];
// At this point the new frame has been encoded coded.
// If any buffer copy / swaping is signalled it should be done here.
if (cm->frame_type == KEY_FRAME)
{
vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12);
vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12);
}
else // For non key frames
{
// Code to copy between reference buffers
if (cm->copy_buffer_to_arf)
{
if (cm->copy_buffer_to_arf == 1)
{
if (cm->refresh_last_frame)
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12);
else
vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12);
}
else if (cm->copy_buffer_to_arf == 2)
vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12);
}
if (cm->copy_buffer_to_gf)
{
if (cm->copy_buffer_to_gf == 1)
{
if (cm->refresh_last_frame)
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12);
else
vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
}
else if (cm->copy_buffer_to_gf == 2)
vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12);
}
}
sem_wait(&cpi->h_event_end_lpf);
}
#endif
/* Move storing frame_type out of the above loop since it is also
* needed in motion search besides loopfilter */
cm->last_frame_type = cm->frame_type;
// Update rate control heuristics
cpi->total_byte_count += (*size);
@ -5325,7 +5359,9 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame)
{
generate_psnr_packet(cpi);
}
#if CONFIG_PSNR

View File

@ -603,12 +603,17 @@ typedef struct
int encoding_thread_count;
pthread_t *h_encoding_thread;
pthread_t h_filter_thread;
MB_ROW_COMP *mb_row_ei;
ENCODETHREAD_DATA *en_thread_data;
LPFTHREAD_DATA lpf_thread_data;
//events
sem_t *h_event_start_encoding;
sem_t h_event_end_encoding;
sem_t h_event_start_lpf;
sem_t h_event_end_lpf;
#endif
TOKENLIST *tplist;