Merge "Add atomics to vp8 synchronization primitives."
This commit is contained in:
commit
03191f738e
@ -191,47 +191,18 @@ static inline int sem_destroy(sem_t *sem) {
|
||||
#define x86_pause_hint()
|
||||
#endif
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#define USE_MUTEX_LOCK 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "vpx_util/vpx_thread.h"
|
||||
#include "vpx_util/vpx_atomics.h"
|
||||
|
||||
static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) {
|
||||
(void)mutex;
|
||||
#if defined(USE_MUTEX_LOCK)
|
||||
int ret;
|
||||
pthread_mutex_lock(mutex);
|
||||
ret = *p;
|
||||
pthread_mutex_unlock(mutex);
|
||||
return ret;
|
||||
#endif
|
||||
return *p;
|
||||
}
|
||||
|
||||
static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col,
|
||||
const int *last_row_current_mb_col,
|
||||
const int nsync) {
|
||||
while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) {
|
||||
static INLINE void vp8_atomic_spin_wait(
|
||||
int mb_col, const vpx_atomic_int *last_row_current_mb_col,
|
||||
const int nsync) {
|
||||
while (mb_col > (vpx_atomic_load_acquire(last_row_current_mb_col) - nsync)) {
|
||||
x86_pause_hint();
|
||||
thread_sleep(0);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) {
|
||||
(void)mutex;
|
||||
#if defined(USE_MUTEX_LOCK)
|
||||
pthread_mutex_lock(mutex);
|
||||
*p = v;
|
||||
pthread_mutex_unlock(mutex);
|
||||
return;
|
||||
#endif
|
||||
*p = v;
|
||||
}
|
||||
|
||||
#undef USE_MUTEX_LOCK
|
||||
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -1205,7 +1205,8 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
|
||||
pbi->frame_corrupt_residual = 0;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) {
|
||||
if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) &&
|
||||
pc->multi_token_partition != ONE_PARTITION) {
|
||||
unsigned int thread;
|
||||
vp8mt_decode_mb_rows(pbi, xd);
|
||||
vp8_yv12_extend_frame_borders(yv12_fb_new);
|
||||
|
@ -68,7 +68,7 @@ typedef struct VP8D_COMP {
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* variable for threading */
|
||||
|
||||
int b_multithreaded_rd;
|
||||
vpx_atomic_int b_multithreaded_rd;
|
||||
int max_threads;
|
||||
int current_mb_col_main;
|
||||
unsigned int decoding_thread_count;
|
||||
@ -76,9 +76,8 @@ typedef struct VP8D_COMP {
|
||||
|
||||
int mt_baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
int sync_range;
|
||||
int *mt_current_mb_col; /* Each row remembers its already decoded column. */
|
||||
pthread_mutex_t *pmutex;
|
||||
pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */
|
||||
/* Each row remembers its already decoded column. */
|
||||
vpx_atomic_int *mt_current_mb_col;
|
||||
|
||||
unsigned char **mt_yabove_row; /* mb_rows x width */
|
||||
unsigned char **mt_uabove_row;
|
||||
|
@ -79,7 +79,8 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
if (pc->full_pixel) mbd->fullpixel_mask = 0xfffffff8;
|
||||
}
|
||||
|
||||
for (i = 0; i < pc->mb_rows; ++i) pbi->mt_current_mb_col[i] = -1;
|
||||
for (i = 0; i < pc->mb_rows; ++i)
|
||||
vpx_atomic_store_release(&pbi->mt_current_mb_col[i], -1);
|
||||
}
|
||||
|
||||
static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
@ -247,12 +248,13 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
int start_mb_row) {
|
||||
const int *last_row_current_mb_col;
|
||||
int *current_mb_col;
|
||||
const vpx_atomic_int *last_row_current_mb_col;
|
||||
vpx_atomic_int *current_mb_col;
|
||||
int mb_row;
|
||||
VP8_COMMON *pc = &pbi->common;
|
||||
const int nsync = pbi->sync_range;
|
||||
const int first_row_no_sync_above = pc->mb_cols + nsync;
|
||||
const vpx_atomic_int first_row_no_sync_above =
|
||||
VPX_ATOMIC_INIT(pc->mb_cols + nsync);
|
||||
int num_part = 1 << pbi->common.multi_token_partition;
|
||||
int last_mb_row = start_mb_row;
|
||||
|
||||
@ -356,13 +358,11 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) {
|
||||
if (((mb_col - 1) % nsync) == 0) {
|
||||
pthread_mutex_t *mutex = &pbi->pmutex[mb_row];
|
||||
protected_write(mutex, current_mb_col, mb_col - 1);
|
||||
vpx_atomic_store_release(current_mb_col, mb_col - 1);
|
||||
}
|
||||
|
||||
if (mb_row && !(mb_col & (nsync - 1))) {
|
||||
pthread_mutex_t *mutex = &pbi->pmutex[mb_row - 1];
|
||||
sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
|
||||
vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);
|
||||
}
|
||||
|
||||
/* Distance of MB to the various image edges.
|
||||
@ -548,7 +548,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
}
|
||||
|
||||
/* last MB of row is ready just after extension is done */
|
||||
protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync);
|
||||
vpx_atomic_store_release(current_mb_col, mb_col + nsync);
|
||||
|
||||
++xd->mode_info_context; /* skip prediction column */
|
||||
xd->up_available = 1;
|
||||
@ -568,10 +568,10 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) {
|
||||
ENTROPY_CONTEXT_PLANES mb_row_left_context;
|
||||
|
||||
while (1) {
|
||||
if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) break;
|
||||
if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) break;
|
||||
|
||||
if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) {
|
||||
if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) {
|
||||
if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) {
|
||||
break;
|
||||
} else {
|
||||
MACROBLOCKD *xd = &mbrd->mbd;
|
||||
@ -589,9 +589,8 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
|
||||
int core_count = 0;
|
||||
unsigned int ithread;
|
||||
|
||||
pbi->b_multithreaded_rd = 0;
|
||||
vpx_atomic_init(&pbi->b_multithreaded_rd, 0);
|
||||
pbi->allocated_decoding_thread_count = 0;
|
||||
pthread_mutex_init(&pbi->mt_mutex, NULL);
|
||||
|
||||
/* limit decoding threads to the max number of token partitions */
|
||||
core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
|
||||
@ -602,7 +601,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
|
||||
}
|
||||
|
||||
if (core_count > 1) {
|
||||
pbi->b_multithreaded_rd = 1;
|
||||
vpx_atomic_init(&pbi->b_multithreaded_rd, 1);
|
||||
pbi->decoding_thread_count = core_count - 1;
|
||||
|
||||
CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
|
||||
@ -648,16 +647,6 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
|
||||
void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) {
|
||||
int i;
|
||||
|
||||
/* De-allocate mutex */
|
||||
if (pbi->pmutex != NULL) {
|
||||
for (i = 0; i < mb_rows; ++i) {
|
||||
pthread_mutex_destroy(&pbi->pmutex[i]);
|
||||
}
|
||||
|
||||
vpx_free(pbi->pmutex);
|
||||
pbi->pmutex = NULL;
|
||||
}
|
||||
|
||||
vpx_free(pbi->mt_current_mb_col);
|
||||
pbi->mt_current_mb_col = NULL;
|
||||
|
||||
@ -723,7 +712,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
|
||||
int i;
|
||||
int uv_width;
|
||||
|
||||
if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {
|
||||
if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
|
||||
vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
|
||||
|
||||
/* our internal buffers are always multiples of 16 */
|
||||
@ -741,17 +730,11 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
|
||||
|
||||
uv_width = width >> 1;
|
||||
|
||||
/* Allocate mutex */
|
||||
CHECK_MEM_ERROR(pbi->pmutex,
|
||||
vpx_malloc(sizeof(*pbi->pmutex) * pc->mb_rows));
|
||||
if (pbi->pmutex) {
|
||||
for (i = 0; i < pc->mb_rows; ++i) {
|
||||
pthread_mutex_init(&pbi->pmutex[i], NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate an int for each mb row. */
|
||||
CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
|
||||
/* Allocate a vpx_atomic_int for each mb row. */
|
||||
CHECK_MEM_ERROR(pbi->mt_current_mb_col,
|
||||
vpx_malloc(sizeof(*pbi->mt_current_mb_col) * pc->mb_rows));
|
||||
for (i = 0; i < pc->mb_rows; ++i)
|
||||
vpx_atomic_init(&pbi->mt_current_mb_col[i], 0);
|
||||
|
||||
/* Allocate memory for above_row buffers. */
|
||||
CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
|
||||
@ -792,9 +775,9 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
|
||||
|
||||
void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
|
||||
/* shutdown MB Decoding thread; */
|
||||
if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {
|
||||
if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
|
||||
int i;
|
||||
protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0);
|
||||
vpx_atomic_store_release(&pbi->b_multithreaded_rd, 0);
|
||||
|
||||
/* allow all threads to exit */
|
||||
for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {
|
||||
@ -824,7 +807,6 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
|
||||
|
||||
vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
|
||||
}
|
||||
pthread_mutex_destroy(&pbi->mt_mutex);
|
||||
}
|
||||
|
||||
void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) {
|
||||
|
@ -1416,7 +1416,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
|
||||
vp8_start_encode(&cpi->bc[1], cx_data, cx_data_end);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded) {
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
|
||||
pack_mb_row_tokens(cpi, &cpi->bc[1]);
|
||||
} else {
|
||||
vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
|
||||
|
@ -341,11 +341,11 @@ static void encode_mb_row(VP8_COMP *cpi, VP8_COMMON *cm, int mb_row,
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
const int nsync = cpi->mt_sync_range;
|
||||
const int rightmost_col = cm->mb_cols + nsync;
|
||||
const int *last_row_current_mb_col;
|
||||
int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
|
||||
vpx_atomic_int rightmost_col = VPX_ATOMIC_INIT(cm->mb_cols + nsync);
|
||||
const vpx_atomic_int *last_row_current_mb_col;
|
||||
vpx_atomic_int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
|
||||
|
||||
if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) {
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0 && mb_row != 0) {
|
||||
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
|
||||
} else {
|
||||
last_row_current_mb_col = &rightmost_col;
|
||||
@ -415,15 +415,13 @@ static void encode_mb_row(VP8_COMP *cpi, VP8_COMMON *cm, int mb_row,
|
||||
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded != 0) {
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) {
|
||||
if (((mb_col - 1) % nsync) == 0) {
|
||||
pthread_mutex_t *mutex = &cpi->pmutex[mb_row];
|
||||
protected_write(mutex, current_mb_col, mb_col - 1);
|
||||
vpx_atomic_store_release(current_mb_col, mb_col - 1);
|
||||
}
|
||||
|
||||
if (mb_row && !(mb_col & (nsync - 1))) {
|
||||
pthread_mutex_t *mutex = &cpi->pmutex[mb_row - 1];
|
||||
sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
|
||||
vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -563,8 +561,9 @@ static void encode_mb_row(VP8_COMP *cpi, VP8_COMMON *cm, int mb_row,
|
||||
xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded != 0) {
|
||||
protected_write(&cpi->pmutex[mb_row], current_mb_col, rightmost_col);
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) {
|
||||
vpx_atomic_store_release(current_mb_col,
|
||||
vpx_atomic_load_acquire(&rightmost_col));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -749,13 +748,14 @@ void vp8_encode_frame(VP8_COMP *cpi) {
|
||||
vpx_usec_timer_start(&emr_timer);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded) {
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
|
||||
int i;
|
||||
|
||||
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei,
|
||||
cpi->encoding_thread_count);
|
||||
|
||||
for (i = 0; i < cm->mb_rows; ++i) cpi->mt_current_mb_col[i] = -1;
|
||||
for (i = 0; i < cm->mb_rows; ++i)
|
||||
vpx_atomic_store_release(&cpi->mt_current_mb_col[i], -1);
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; ++i) {
|
||||
sem_post(&cpi->h_event_start_encoding[i]);
|
||||
|
@ -26,11 +26,11 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data) {
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
|
||||
while (1) {
|
||||
if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;
|
||||
|
||||
if (sem_wait(&cpi->h_event_start_lpf) == 0) {
|
||||
/* we're shutting down */
|
||||
if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;
|
||||
|
||||
vp8_loopfilter_frame(cpi, cm);
|
||||
|
||||
@ -48,7 +48,7 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
|
||||
ENTROPY_CONTEXT_PLANES mb_row_left_context;
|
||||
|
||||
while (1) {
|
||||
if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;
|
||||
|
||||
if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) {
|
||||
const int nsync = cpi->mt_sync_range;
|
||||
@ -66,7 +66,7 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
|
||||
int *totalrate = &mbri->totalrate;
|
||||
|
||||
/* we're shutting down */
|
||||
if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;
|
||||
|
||||
xd->mode_info_context = cm->mi + cm->mode_info_stride * (ithread + 1);
|
||||
xd->mode_info_stride = cm->mode_info_stride;
|
||||
@ -80,8 +80,8 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
|
||||
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
|
||||
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
|
||||
int map_index = (mb_row * cm->mb_cols);
|
||||
const int *last_row_current_mb_col;
|
||||
int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
|
||||
const vpx_atomic_int *last_row_current_mb_col;
|
||||
vpx_atomic_int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
|
||||
|
||||
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
|
||||
vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)];
|
||||
@ -108,13 +108,11 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
|
||||
/* for each macroblock col in image */
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
|
||||
if (((mb_col - 1) % nsync) == 0) {
|
||||
pthread_mutex_t *mutex = &cpi->pmutex[mb_row];
|
||||
protected_write(mutex, current_mb_col, mb_col - 1);
|
||||
vpx_atomic_store_release(current_mb_col, mb_col - 1);
|
||||
}
|
||||
|
||||
if (mb_row && !(mb_col & (nsync - 1))) {
|
||||
pthread_mutex_t *mutex = &cpi->pmutex[mb_row - 1];
|
||||
sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
|
||||
vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);
|
||||
}
|
||||
|
||||
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
|
||||
@ -286,7 +284,7 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
|
||||
vp8_extend_mb_row(&cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16,
|
||||
xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
|
||||
|
||||
protected_write(&cpi->pmutex[mb_row], current_mb_col, mb_col + nsync);
|
||||
vpx_atomic_store_release(current_mb_col, mb_col + nsync);
|
||||
|
||||
/* this is to account for the border */
|
||||
xd->mode_info_context++;
|
||||
@ -490,12 +488,10 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
|
||||
const VP8_COMMON *cm = &cpi->common;
|
||||
|
||||
cpi->b_multi_threaded = 0;
|
||||
vpx_atomic_init(&cpi->b_multi_threaded, 0);
|
||||
cpi->encoding_thread_count = 0;
|
||||
cpi->b_lpf_running = 0;
|
||||
|
||||
pthread_mutex_init(&cpi->mt_mutex, NULL);
|
||||
|
||||
if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) {
|
||||
int ithread;
|
||||
int th_count = cpi->oxcf.multi_threaded - 1;
|
||||
@ -526,7 +522,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
|
||||
CHECK_MEM_ERROR(cpi->en_thread_data,
|
||||
vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
|
||||
|
||||
cpi->b_multi_threaded = 1;
|
||||
vpx_atomic_store_release(&cpi->b_multi_threaded, 1);
|
||||
cpi->encoding_thread_count = th_count;
|
||||
|
||||
/*
|
||||
@ -555,7 +551,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
|
||||
|
||||
if (rc) {
|
||||
/* shutdown other threads */
|
||||
protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
|
||||
vpx_atomic_store_release(&cpi->b_multi_threaded, 0);
|
||||
for (--ithread; ithread >= 0; ithread--) {
|
||||
pthread_join(cpi->h_encoding_thread[ithread], 0);
|
||||
sem_destroy(&cpi->h_event_start_encoding[ithread]);
|
||||
@ -569,8 +565,6 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
|
||||
vpx_free(cpi->mb_row_ei);
|
||||
vpx_free(cpi->en_thread_data);
|
||||
|
||||
pthread_mutex_destroy(&cpi->mt_mutex);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -585,7 +579,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
|
||||
|
||||
if (rc) {
|
||||
/* shutdown other threads */
|
||||
protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
|
||||
vpx_atomic_store_release(&cpi->b_multi_threaded, 0);
|
||||
for (--ithread; ithread >= 0; ithread--) {
|
||||
sem_post(&cpi->h_event_start_encoding[ithread]);
|
||||
sem_post(&cpi->h_event_end_encoding[ithread]);
|
||||
@ -603,8 +597,6 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
|
||||
vpx_free(cpi->mb_row_ei);
|
||||
vpx_free(cpi->en_thread_data);
|
||||
|
||||
pthread_mutex_destroy(&cpi->mt_mutex);
|
||||
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
@ -613,9 +605,9 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
|
||||
}
|
||||
|
||||
void vp8cx_remove_encoder_threads(VP8_COMP *cpi) {
|
||||
if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded)) {
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
|
||||
/* shutdown other threads */
|
||||
protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
|
||||
vpx_atomic_store_release(&cpi->b_multi_threaded, 0);
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -643,6 +635,5 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi) {
|
||||
vpx_free(cpi->mb_row_ei);
|
||||
vpx_free(cpi->en_thread_data);
|
||||
}
|
||||
pthread_mutex_destroy(&cpi->mt_mutex);
|
||||
}
|
||||
#endif
|
||||
|
@ -451,18 +451,6 @@ static void dealloc_compressor_data(VP8_COMP *cpi) {
|
||||
cpi->mb.pip = 0;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* De-allocate mutex */
|
||||
if (cpi->pmutex != NULL) {
|
||||
VP8_COMMON *const pc = &cpi->common;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pc->mb_rows; ++i) {
|
||||
pthread_mutex_destroy(&cpi->pmutex[i]);
|
||||
}
|
||||
vpx_free(cpi->pmutex);
|
||||
cpi->pmutex = NULL;
|
||||
}
|
||||
|
||||
vpx_free(cpi->mt_current_mb_col);
|
||||
cpi->mt_current_mb_col = NULL;
|
||||
#endif
|
||||
@ -1153,9 +1141,6 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) {
|
||||
|
||||
int width = cm->Width;
|
||||
int height = cm->Height;
|
||||
#if CONFIG_MULTITHREAD
|
||||
int prev_mb_rows = cm->mb_rows;
|
||||
#endif
|
||||
|
||||
if (vp8_alloc_frame_buffers(cm, width, height)) {
|
||||
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
|
||||
@ -1247,26 +1232,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) {
|
||||
if (cpi->oxcf.multi_threaded > 1) {
|
||||
int i;
|
||||
|
||||
/* De-allocate and re-allocate mutex */
|
||||
if (cpi->pmutex != NULL) {
|
||||
for (i = 0; i < prev_mb_rows; ++i) {
|
||||
pthread_mutex_destroy(&cpi->pmutex[i]);
|
||||
}
|
||||
vpx_free(cpi->pmutex);
|
||||
cpi->pmutex = NULL;
|
||||
}
|
||||
|
||||
CHECK_MEM_ERROR(cpi->pmutex,
|
||||
vpx_malloc(sizeof(*cpi->pmutex) * cm->mb_rows));
|
||||
if (cpi->pmutex) {
|
||||
for (i = 0; i < cm->mb_rows; ++i) {
|
||||
pthread_mutex_init(&cpi->pmutex[i], NULL);
|
||||
}
|
||||
}
|
||||
|
||||
vpx_free(cpi->mt_current_mb_col);
|
||||
CHECK_MEM_ERROR(cpi->mt_current_mb_col,
|
||||
vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
|
||||
for (i = 0; i < cm->mb_rows; ++i)
|
||||
vpx_atomic_init(&cpi->mt_current_mb_col[i], 0);
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -3274,7 +3244,7 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) {
|
||||
}
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded) {
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
|
||||
sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */
|
||||
}
|
||||
#endif
|
||||
@ -4471,7 +4441,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded) {
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
|
||||
/* start loopfilter in separate thread */
|
||||
sem_post(&cpi->h_event_start_lpf);
|
||||
cpi->b_lpf_running = 1;
|
||||
@ -4497,7 +4467,8 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* wait that filter_level is picked so that we can continue with stream
|
||||
* packing */
|
||||
if (cpi->b_multi_threaded) sem_wait(&cpi->h_event_end_lpf);
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded))
|
||||
sem_wait(&cpi->h_event_end_lpf);
|
||||
#endif
|
||||
|
||||
/* build the bitstream */
|
||||
@ -5341,7 +5312,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* wait for the lpf thread done */
|
||||
if (cpi->b_multi_threaded && cpi->b_lpf_running) {
|
||||
if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) && cpi->b_lpf_running) {
|
||||
sem_wait(&cpi->h_event_end_lpf);
|
||||
cpi->b_lpf_running = 0;
|
||||
}
|
||||
|
@ -518,11 +518,9 @@ typedef struct VP8_COMP {
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* multithread data */
|
||||
pthread_mutex_t *pmutex;
|
||||
pthread_mutex_t mt_mutex; /* mutex for b_multi_threaded */
|
||||
int *mt_current_mb_col;
|
||||
vpx_atomic_int *mt_current_mb_col;
|
||||
int mt_sync_range;
|
||||
int b_multi_threaded;
|
||||
vpx_atomic_int b_multi_threaded;
|
||||
int encoding_thread_count;
|
||||
int b_lpf_running;
|
||||
|
||||
|
@ -415,7 +415,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (pbi->b_multithreaded_rd) {
|
||||
if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
|
||||
vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);
|
||||
}
|
||||
#else
|
||||
|
133
vpx_util/vpx_atomics.h
Normal file
133
vpx_util/vpx_atomics.h
Normal file
@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VPX_UTIL_VPX_ATOMICS_H_
|
||||
#define VPX_UTIL_VPX_ATOMICS_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
|
||||
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \
|
||||
(defined(__cplusplus) && __cplusplus >= 201112L)
|
||||
// Where available, use <stdatomic.h>
|
||||
#include <stdatomic.h>
|
||||
#define VPX_USE_STD_ATOMIC
|
||||
#else
|
||||
// Look for built-ins.
|
||||
#if !defined(__has_builtin)
|
||||
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
|
||||
#endif // !defined(__has_builtin)
|
||||
|
||||
#if (__has_builtin(__atomic_load_n)) || \
|
||||
(defined(__GNUC__) && \
|
||||
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
|
||||
// For GCC >= 4.7 and Clang that support __atomic builtins, use those.
|
||||
#define VPX_USE_ATOMIC_BUILTINS
|
||||
#else
|
||||
// Use platform-specific asm barriers.
|
||||
#if defined(_MSC_VER)
|
||||
// TODO(pbos): This assumes that newer versions of MSVC are building with the
|
||||
// default /volatile:ms (or older, where this is always true. Consider adding
|
||||
// support for using <atomic> instead of stdatomic.h when building C++11 under
|
||||
// MSVC. It's unclear what to do for plain C under /volatile:iso (inline asm?),
|
||||
// there're no explicit Interlocked* functions for only storing or loading
|
||||
// (presumably because volatile has historically implied that on MSVC).
|
||||
//
|
||||
// For earlier versions of MSVC or the default /volatile:ms volatile int are
|
||||
// acquire/release and require no barrier.
|
||||
#define vpx_atomic_memory_barrier() \
|
||||
do { \
|
||||
} while (0)
|
||||
#else
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
// Use a compiler barrier on x86, no runtime penalty.
|
||||
#define vpx_atomic_memory_barrier() __asm__ __volatile__("" ::: "memory")
|
||||
#elif ARCH_ARM
|
||||
#define vpx_atomic_memory_barrier() __asm__ __volatile__("dmb ish" ::: "memory")
|
||||
#elif ARCH_MIPS
|
||||
#define vpx_atomic_memory_barrier() __asm__ __volatile__("sync" ::: "memory")
|
||||
#else
|
||||
#error Unsupported architecture!
|
||||
#endif // ARCH_X86 || ARCH_X86_64
|
||||
#endif // defined(_MSC_VER)
|
||||
#endif // atomic builtin availability check
|
||||
#endif // stdatomic availability check
|
||||
|
||||
// These are wrapped in a struct so that they are not easily accessed directly
|
||||
// on any platform (to discourage programmer errors by setting values directly).
|
||||
// This primitive MUST be initialized using vpx_atomic_init or VPX_ATOMIC_INIT
|
||||
// (NOT memset) and accessed through vpx_atomic_ functions.
|
||||
typedef struct vpx_atomic_int {
|
||||
#if defined(VPX_USE_STD_ATOMIC)
|
||||
atomic_int value;
|
||||
#else
|
||||
volatile int value;
|
||||
#endif // defined(USE_STD_ATOMIC)
|
||||
} vpx_atomic_int;
|
||||
|
||||
#if defined(VPX_USE_STD_ATOMIC)
|
||||
#define VPX_ATOMIC_INIT(num) \
|
||||
{ ATOMIC_VAR_INIT(num) }
|
||||
#else
|
||||
#define VPX_ATOMIC_INIT(num) \
|
||||
{ num }
|
||||
#endif // defined(VPX_USE_STD_ATOMIC)
|
||||
|
||||
// Initialization of an atomic int, not thread safe.
|
||||
static INLINE void vpx_atomic_init(vpx_atomic_int *atomic, int value) {
|
||||
#if defined(VPX_USE_STD_ATOMIC)
|
||||
atomic_init(&atomic->value, value);
|
||||
#else
|
||||
atomic->value = value;
|
||||
#endif // defined(USE_STD_ATOMIC)
|
||||
}
|
||||
|
||||
static INLINE void vpx_atomic_store_release(vpx_atomic_int *atomic, int value) {
|
||||
#if defined(VPX_USE_STD_ATOMIC)
|
||||
atomic_store_explicit(&atomic->value, value, memory_order_release);
|
||||
#elif defined(VPX_USE_ATOMIC_BUILTINS)
|
||||
__atomic_store_n(&atomic->value, value, __ATOMIC_RELEASE);
|
||||
#else
|
||||
vpx_atomic_memory_barrier();
|
||||
atomic->value = value;
|
||||
#endif // defined(VPX_USE_STD_ATOMIC)
|
||||
}
|
||||
|
||||
static INLINE int vpx_atomic_load_acquire(const vpx_atomic_int *atomic) {
|
||||
#if defined(VPX_USE_STD_ATOMIC)
|
||||
// const_cast (in C) that doesn't trigger -Wcast-qual.
|
||||
return atomic_load_explicit(
|
||||
(atomic_int *)(uintptr_t)(const void *)&atomic->value,
|
||||
memory_order_acquire);
|
||||
#elif defined(VPX_USE_ATOMIC_BUILTINS)
|
||||
return __atomic_load_n(&atomic->value, __ATOMIC_ACQUIRE);
|
||||
#else
|
||||
int v = atomic->value;
|
||||
vpx_atomic_memory_barrier();
|
||||
return v;
|
||||
#endif // defined(VPX_USE_STD_ATOMIC)
|
||||
}
|
||||
|
||||
#undef VPX_USE_STD_ATOMIC
|
||||
#undef VPX_USE_ATOMIC_BUILTINS
|
||||
#undef vpx_atomic_memory_barrier
|
||||
|
||||
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // VPX_UTIL_VPX_ATOMICS_H_
|
@ -8,6 +8,7 @@
|
||||
## be found in the AUTHORS file in the root of the source tree.
|
||||
##
|
||||
|
||||
UTIL_SRCS-yes += vpx_atomics.h
|
||||
UTIL_SRCS-yes += vpx_util.mk
|
||||
UTIL_SRCS-yes += vpx_thread.c
|
||||
UTIL_SRCS-yes += vpx_thread.h
|
||||
|
Loading…
x
Reference in New Issue
Block a user