Use WelsMultipleEventsWaitSingleBlocking with a master event for waiting on finished threads

This allows using the same codepath for both unix and windows
for distributing new slices to code to threads.

This also improves the performance on unix - instead of waiting
for all the current threads to finish their current slice
before handing out a new slice to each of them (where the threads
that finish first will just wait instead of immediately getting
a new slice to work on), we now use the same logic as on windows.

In one setup, it improves the performance of encoding from ~920 fps
to ~950 fps, and in another setup it goes from ~390 fps to ~660 fps.
(These tests were done with the SM_ROWMB_SLICE mode, which
heavily exercises the code for distributing new slices to the
worker threads.)

The extra WelsEventSignal call on windows where it isn't strictly
necessary doesn't incur any measurable slowdown, so it is kept
without any extra ifdefs to keep the code more readable and unified.
This commit is contained in:
Martin Storsjö 2014-03-03 22:45:23 +02:00
parent d0a81355b0
commit 801da26d1d
3 changed files with 18 additions and 30 deletions

View File

@ -94,6 +94,7 @@ SSliceThreadPrivateData* pThreadPEncCtx;// thread context, [iThreadIdx]
char eventNamespace[100];
WELS_THREAD_HANDLE pThreadHandles[MAX_THREADS_NUM];// thread handles, [iThreadIdx]
WELS_EVENT pSliceCodedEvent[MAX_THREADS_NUM];// events for slice coded state, [iThreadIdx]
WELS_EVENT pSliceCodedMasterEvent; // events for signalling that some event in pSliceCodedEvent has been signalled
WELS_EVENT pReadySliceCodingEvent[MAX_THREADS_NUM]; // events for slice coding ready, [iThreadIdx]
WELS_EVENT pUpdateMbListEvent[MAX_THREADS_NUM]; // signal to update mb list neighbor for various slices
WELS_EVENT pFinUpdateMbListEvent[MAX_THREADS_NUM]; // signal to indicate finish updating mb list

View File

@ -3225,7 +3225,7 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo * pFbi, const SSou
return ENC_RETURN_UNEXPECTED;
}
WelsMultipleEventsWaitAllBlocking (iSliceCount, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
WelsMultipleEventsWaitAllBlocking (iSliceCount, &pCtx->pSliceThreading->pSliceCodedEvent[0], &pCtx->pSliceThreading->pSliceCodedMasterEvent);
// all slices are finished coding here
@ -3266,12 +3266,12 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo * pFbi, const SSou
while (1) {
if (iIndexOfSliceToBeCoded >= iSliceCount && iNumThreadsRunning <= 0)
break;
#ifdef _WIN32
WELS_THREAD_ERROR_CODE lwait = 0;
int32_t iEventId = -1;
lwait = WelsMultipleEventsWaitSingleBlocking (iNumThreadsScheduled,
&pCtx->pSliceThreading->pSliceCodedEvent[0]);
&pCtx->pSliceThreading->pSliceCodedEvent[0],
&pCtx->pSliceThreading->pSliceCodedMasterEvent);
iEventId = (int32_t) (lwait - WELS_THREAD_ERROR_WAIT_OBJECT_0);
if (iEventId >= 0 && iEventId < iNumThreadsScheduled) {
if (iIndexOfSliceToBeCoded < iSliceCount) {
@ -3285,29 +3285,6 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo * pFbi, const SSou
-- iNumThreadsRunning;
}
}
#else
// TODO for pthread platforms
// alternate implementation using blocking due non-blocking with timeout mode not support at wels thread lib, tune back if available
WelsMultipleEventsWaitAllBlocking (iNumThreadsRunning, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
if (iIndexOfSliceToBeCoded < iSliceCount) {
int32_t iThreadIdx = 0;
// pick up succeeding slices for threading if left
while (iThreadIdx < iNumThreadsScheduled) {
if (iIndexOfSliceToBeCoded >= iSliceCount)
break;
pCtx->pSliceThreading->pThreadPEncCtx[iThreadIdx].iSliceIndex = iIndexOfSliceToBeCoded;
WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iThreadIdx]);
++ iIndexOfSliceToBeCoded;
++ iThreadIdx;
}
// update iNumThreadsRunning
iNumThreadsRunning = iThreadIdx;
} else {
iNumThreadsRunning = 0;
}
#endif//_WIN32
}//while(1)
// all slices are finished coding here
@ -3329,7 +3306,7 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo * pFbi, const SSou
return ENC_RETURN_UNEXPECTED;
}
WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0], &pCtx->pSliceThreading->pSliceCodedMasterEvent);
WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, kiPartitionCnt);

View File

@ -351,10 +351,11 @@ int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingPara
MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "encpEncCtx= 0x%p\n", (void*) (*ppCtx));
char name[SEM_NAME_MAX] = {0};
WELS_THREAD_ERROR_CODE err = 0;
iIdx = 0;
while (iIdx < iThreadNum) {
char name[SEM_NAME_MAX] = {0};
WELS_THREAD_ERROR_CODE err = 0;
pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void*) (*ppCtx);
pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx;
pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx;
@ -386,6 +387,10 @@ int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingPara
++ iIdx;
}
WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name);
MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d\n", name, err, errno);
(*ppCtx)->pSliceBs = (SWelsSliceBs*)pMa->WelsMalloc (sizeof (SWelsSliceBs) * iMaxSliceNum, "pSliceBs");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSliceBs), FreeMemorySvc (ppCtx))
@ -444,8 +449,8 @@ void ReleaseMtResource (sWelsEncCtx** ppCtx) {
if (NULL == pSmt)
return;
char ename[SEM_NAME_MAX] = {0};
while (iIdx < iThreadNum) {
char ename[SEM_NAME_MAX] = {0};
// length of semaphore name should be system constrained at least on mac 10.7
#ifdef _WIN32
if (pSmt->pThreadHandles != NULL && pSmt->pThreadHandles[iIdx] != NULL)
@ -467,6 +472,8 @@ void ReleaseMtResource (sWelsEncCtx** ppCtx) {
++ iIdx;
}
WelsSnprintf (ename, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
WelsEventClose (&pSmt->pSliceCodedMasterEvent, ename);
WelsMutexDestroy (&pSmt->mutexSliceNumUpdate);
WelsMutexDestroy (&((*ppCtx)->mutexEncoderError));
@ -864,6 +871,8 @@ WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) {
WelsEventSignal (
&pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice
WelsEventSignal (
&pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
} else { // for SM_DYN_SLICE parallelization
SSliceCtx* pSliceCtx = pCurDq->pSliceEncCtx;
const int32_t kiPartitionId = iThreadIdx;
@ -967,6 +976,7 @@ WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) {
break;
WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice
WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
}
}
#ifdef _WIN32